From bc033a09c1da038df796f1b24c78f8f90b31cf2c Mon Sep 17 00:00:00 2001 From: Simon Bein Date: Mon, 15 Apr 2024 17:49:59 +0200 Subject: [PATCH 1/6] Discover if a release is stuck and rollback if necessary Signed-off-by: Simon Bein --- pkg/applications/helmclient/client.go | 20 +++++ pkg/applications/installer.go | 26 +++++++ pkg/applications/providers/template/helm.go | 73 +++++++++++++++++++ pkg/applications/providers/types.go | 6 ++ .../controller.go | 10 +++ 5 files changed, 135 insertions(+) diff --git a/pkg/applications/helmclient/client.go b/pkg/applications/helmclient/client.go index 9a94674f265..021f1215062 100644 --- a/pkg/applications/helmclient/client.go +++ b/pkg/applications/helmclient/client.go @@ -416,6 +416,26 @@ func (h HelmClient) Uninstall(releaseName string) (*release.UninstallReleaseResp return uninstallReleaseResponse, err } +// GetMetadata wraps helms GetMetadata command to be used with our ActionConfig +func (h HelmClient) GetMetadata(releaseName string) (*action.Metadata, error) { + client := action.NewGetMetadata(h.actionConfig) + res, err := client.Run(releaseName) + if err != nil { + return nil, fmt.Errorf("Could not retrieve metadata for release %q: %w", releaseName, err) + } + return res, nil +} + +// Rollback wraps helms Rollback command to be used with our ActionConfig +func (h HelmClient) Rollback(releaseName string) error { + client := action.NewRollback(h.actionConfig) + err := client.Run(releaseName) + if err != nil { + return fmt.Errorf("Could not rollback release %q: %w", releaseName, err) + } + return nil +} + // buildDependencies adds missing repositories and then does a Helm dependency build (i.e. download the chart dependencies // from repositories into "charts" folder). func (h HelmClient) buildDependencies(chartLoc string, auth AuthSettings) (*chart.Chart, error) { diff --git a/pkg/applications/installer.go b/pkg/applications/installer.go index 1b671cbbcbf..253ee6fb597 100644 --- a/pkg/applications/installer.go +++ b/pkg/applications/installer.go @@ -44,6 +44,12 @@ type ApplicationInstaller interface { // Delete function uninstalls the application on the user-cluster and returns an error if the uninstallation has failed. StatusUpdater is guaranteed to be non nil. This is idempotent. Delete(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (util.StatusUpdater, error) + + // IsReleaseStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries + IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) + + // Rollback rolls an Application back to the previous release + Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error } // ApplicationManager handles the installation / uninstallation of an Application on the user-cluster. @@ -138,3 +144,23 @@ func (a *ApplicationManager) reconcileNamespace(ctx context.Context, log *zap.Su } return nil } + +// IsReleaseStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries +func (a *ApplicationManager) IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { + templateProvider, err := providers.NewTemplateProvider(ctx, seedClient, a.Kubeconfig, a.ApplicationCache, log, applicationInstallation, a.SecretNamespace) + if err != nil { + return false, fmt.Errorf("failed to initialize template provider: %w", err) + } + + return templateProvider.IsReleaseStuck(applicationInstallation) +} + +// Rollback rolls an Application back to the previous release +func (a *ApplicationManager) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error { + templateProvider, err := providers.NewTemplateProvider(ctx, seedClient, a.Kubeconfig, a.ApplicationCache, log, applicationInstallation, a.SecretNamespace) + if err != nil { + return fmt.Errorf("failed to initialize template provider: %w", err) + } + + return templateProvider.Rollback(applicationInstallation) +} diff --git a/pkg/applications/providers/template/helm.go b/pkg/applications/providers/template/helm.go index 6a07163b6cf..e28f8a95d9c 100644 --- a/pkg/applications/providers/template/helm.go +++ b/pkg/applications/providers/template/helm.go @@ -208,3 +208,76 @@ func getDeployOpts(appDefinition *appskubermaticv1.ApplicationDefinition, appIns // Fallback to default options. return helmclient.NewDeployOpts(false, 0, false, false) } + +// IsReleaseStuck aims to identify if a helm release is stuck. This targets an upstream issue in helm, which has not been resolved. For further details see: +// - https://github.com/helm/helm/issues/7476 +// - https://github.com/helm/helm/issues/4558 +func (h HelmTemplate) IsReleaseStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { + // if the release was successful, exit early + if applicationInstallation.Status.Conditions[appskubermaticv1.Ready].Status == "True" { + return false, nil + } + // currently we observe the stuck error exclusively with this message. If it does not exist, exit early + if applicationInstallation.Status.Conditions[appskubermaticv1.Ready].Message != "another operation (install/upgrade/rollback) is in progress" { + return false, nil + } + + helmCacheDir, err := util.CreateHelmTempDir(h.CacheDir) + if err != nil { + return false, fmt.Errorf("failed to create helmCacheDir: %w", err) + } + + defer util.CleanUpHelmTempDir(helmCacheDir, h.Log) + restClientGetter := &genericclioptions.ConfigFlags{ + KubeConfig: &h.Kubeconfig, + Namespace: &applicationInstallation.Spec.Namespace.Name, + } + helmClient, err := helmclient.NewClient( + h.Ctx, + restClientGetter, + helmclient.NewSettings(helmCacheDir), + applicationInstallation.Spec.Namespace.Name, + h.Log) + if err != nil { + return false, fmt.Errorf("failed to create helmClient: %w", err) + } + + // retrieve metadata of the latest release + releaseName := getReleaseName(applicationInstallation) + metadata, err := helmClient.GetMetadata(releaseName) + if err != nil { + return false, fmt.Errorf("failed to retrieve metadata for checking if release %q is stuck: %w", releaseName, err) + } + + // if the status of the release is not still pending, exit early + if metadata.Status != "pending-upgrade" { + return false, nil + } + + return true, nil +} + +// Rollback rolls an Application back to the previous release +func (h HelmTemplate) Rollback(applicationInstallation *appskubermaticv1.ApplicationInstallation) error { + helmCacheDir, err := util.CreateHelmTempDir(h.CacheDir) + if err != nil { + return fmt.Errorf("failed to create helmCacheDir: %w", err) + } + + defer util.CleanUpHelmTempDir(helmCacheDir, h.Log) + restClientGetter := &genericclioptions.ConfigFlags{ + KubeConfig: &h.Kubeconfig, + Namespace: &applicationInstallation.Spec.Namespace.Name, + } + helmClient, err := helmclient.NewClient( + h.Ctx, + restClientGetter, + helmclient.NewSettings(helmCacheDir), + applicationInstallation.Spec.Namespace.Name, + h.Log) + if err != nil { + return fmt.Errorf("failed to create helmClient: %w", err) + } + + return helmClient.Rollback(getReleaseName(applicationInstallation)) +} diff --git a/pkg/applications/providers/types.go b/pkg/applications/providers/types.go index be7160e3764..0103d32772b 100644 --- a/pkg/applications/providers/types.go +++ b/pkg/applications/providers/types.go @@ -59,6 +59,12 @@ type TemplateProvider interface { // Uninstall the application. Uninstall(applicationInstallation *appskubermaticv1.ApplicationInstallation) (util.StatusUpdater, error) + + // Check if a Release is stuck + IsReleaseStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) + + // Rollback the Application to the previous release + Rollback(applicationInstallation *appskubermaticv1.ApplicationInstallation) error } // NewTemplateProvider return the concrete implementation of TemplateProvider according to the templateMethod. diff --git a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go index 72e90236619..a7fecabef9c 100644 --- a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go +++ b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go @@ -242,6 +242,16 @@ func (r *reconciler) handleInstallation(ctx context.Context, log *zap.SugaredLog return nil } + // Because some upstream tools are not completely idempotent, we need a check to make sure a release is not stuck. + // This should be run before we make any changes to the status field, so we can use it in our analysis + stuck, err := r.appInstaller.IsReleaseStuck(ctx, log, r.seedClient, r.userClient, appInstallation) + if err != nil { + return fmt.Errorf("failed to check if the previous release is stuck: %w", err) + } + if stuck { + r.appInstaller.Rollback(ctx, log, r.seedClient, r.userClient, appInstallation) + } + downloadDest, err := os.MkdirTemp(r.appInstaller.GetAppCache(), appInstallation.Namespace+"-"+appInstallation.Name) if err != nil { return fmt.Errorf("failed to create temporary directory where application source will be downloaded: %w", err) From 8425e9615205edb20c0cfe77ac35a271f2f4af05 Mon Sep 17 00:00:00 2001 From: Simon Bein Date: Tue, 16 Apr 2024 12:25:45 +0200 Subject: [PATCH 2/6] linter + noop stubs Signed-off-by: Simon Bein --- pkg/applications/fake/fake_installer.go | 29 +++++++++++++++++++ pkg/applications/helmclient/client.go | 4 +-- pkg/applications/installer.go | 4 +-- pkg/applications/providers/template/helm.go | 2 +- .../controller.go | 4 ++- 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/pkg/applications/fake/fake_installer.go b/pkg/applications/fake/fake_installer.go index ce7196906f6..f46b4337648 100644 --- a/pkg/applications/fake/fake_installer.go +++ b/pkg/applications/fake/fake_installer.go @@ -59,6 +59,15 @@ func (a *ApplicationInstallerRecorder) Delete(ctx context.Context, log *zap.Suga return util.NoStatusUpdate, nil } +func (a *ApplicationInstallerRecorder) IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { + // NOOP + return false, nil +} + +func (a *ApplicationInstallerRecorder) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error { + return nil +} + // ApplicationInstallerLogger is a fake ApplicationInstaller that just logs actions. it's used for the development of the controller. type ApplicationInstallerLogger struct { } @@ -81,6 +90,16 @@ func (a ApplicationInstallerLogger) Delete(ctx context.Context, log *zap.Sugared return util.NoStatusUpdate, nil } +func (a ApplicationInstallerLogger) IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { + // NOOP + return false, nil +} + +func (a ApplicationInstallerLogger) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error { + // NOOP + return nil +} + // CustomApplicationInstaller is an applicationInstaller in which every function can be independently mocked. // If a function is not mocked, then default values are returned. type CustomApplicationInstaller struct { @@ -117,3 +136,13 @@ func (c CustomApplicationInstaller) Delete(ctx context.Context, log *zap.Sugared } return util.NoStatusUpdate, nil } + +func (c CustomApplicationInstaller) IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { + // NOOP + return false, nil +} + +func (c CustomApplicationInstaller) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error { + // NOOP + return nil +} diff --git a/pkg/applications/helmclient/client.go b/pkg/applications/helmclient/client.go index 021f1215062..dc5bd6361c9 100644 --- a/pkg/applications/helmclient/client.go +++ b/pkg/applications/helmclient/client.go @@ -416,7 +416,7 @@ func (h HelmClient) Uninstall(releaseName string) (*release.UninstallReleaseResp return uninstallReleaseResponse, err } -// GetMetadata wraps helms GetMetadata command to be used with our ActionConfig +// GetMetadata wraps helms GetMetadata command to be used with our ActionConfig. func (h HelmClient) GetMetadata(releaseName string) (*action.Metadata, error) { client := action.NewGetMetadata(h.actionConfig) res, err := client.Run(releaseName) @@ -426,7 +426,7 @@ func (h HelmClient) GetMetadata(releaseName string) (*action.Metadata, error) { return res, nil } -// Rollback wraps helms Rollback command to be used with our ActionConfig +// Rollback wraps helms Rollback command to be used with our ActionConfig. func (h HelmClient) Rollback(releaseName string) error { client := action.NewRollback(h.actionConfig) err := client.Run(releaseName) diff --git a/pkg/applications/installer.go b/pkg/applications/installer.go index 253ee6fb597..54ec4b5a01e 100644 --- a/pkg/applications/installer.go +++ b/pkg/applications/installer.go @@ -145,7 +145,7 @@ func (a *ApplicationManager) reconcileNamespace(ctx context.Context, log *zap.Su return nil } -// IsReleaseStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries +// IsReleaseStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries. func (a *ApplicationManager) IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { templateProvider, err := providers.NewTemplateProvider(ctx, seedClient, a.Kubeconfig, a.ApplicationCache, log, applicationInstallation, a.SecretNamespace) if err != nil { @@ -155,7 +155,7 @@ func (a *ApplicationManager) IsReleaseStuck(ctx context.Context, log *zap.Sugare return templateProvider.IsReleaseStuck(applicationInstallation) } -// Rollback rolls an Application back to the previous release +// Rollback rolls an Application back to the previous release. func (a *ApplicationManager) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error { templateProvider, err := providers.NewTemplateProvider(ctx, seedClient, a.Kubeconfig, a.ApplicationCache, log, applicationInstallation, a.SecretNamespace) if err != nil { diff --git a/pkg/applications/providers/template/helm.go b/pkg/applications/providers/template/helm.go index e28f8a95d9c..4ba76035bdf 100644 --- a/pkg/applications/providers/template/helm.go +++ b/pkg/applications/providers/template/helm.go @@ -257,7 +257,7 @@ func (h HelmTemplate) IsReleaseStuck(applicationInstallation *appskubermaticv1.A return true, nil } -// Rollback rolls an Application back to the previous release +// Rollback rolls an Application back to the previous release. func (h HelmTemplate) Rollback(applicationInstallation *appskubermaticv1.ApplicationInstallation) error { helmCacheDir, err := util.CreateHelmTempDir(h.CacheDir) if err != nil { diff --git a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go index a7fecabef9c..bd70b9a4d8f 100644 --- a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go +++ b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go @@ -249,7 +249,9 @@ func (r *reconciler) handleInstallation(ctx context.Context, log *zap.SugaredLog return fmt.Errorf("failed to check if the previous release is stuck: %w", err) } if stuck { - r.appInstaller.Rollback(ctx, log, r.seedClient, r.userClient, appInstallation) + if err := r.appInstaller.Rollback(ctx, log, r.seedClient, r.userClient, appInstallation); err != nil { + return fmt.Errorf("failed to rollback release: %w", err) + } } downloadDest, err := os.MkdirTemp(r.appInstaller.GetAppCache(), appInstallation.Namespace+"-"+appInstallation.Name) From 5363a32400dba6d0ae7b0a9f92bdfedd5f9e27c5 Mon Sep 17 00:00:00 2001 From: Simon Bein Date: Tue, 16 Apr 2024 16:16:23 +0200 Subject: [PATCH 3/6] add additional logging Signed-off-by: Simon Bein --- .../application-installation-controller/controller.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go index bd70b9a4d8f..cc561b8af9b 100644 --- a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go +++ b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go @@ -249,9 +249,11 @@ func (r *reconciler) handleInstallation(ctx context.Context, log *zap.SugaredLog return fmt.Errorf("failed to check if the previous release is stuck: %w", err) } if stuck { + log.Infof("Release %q seems to be stuck in pending even after timeout, attempting rollback now") if err := r.appInstaller.Rollback(ctx, log, r.seedClient, r.userClient, appInstallation); err != nil { return fmt.Errorf("failed to rollback release: %w", err) } + log.Infof("Release %q has been rolled back successfully") } downloadDest, err := os.MkdirTemp(r.appInstaller.GetAppCache(), appInstallation.Namespace+"-"+appInstallation.Name) From fed0066876375bf157571c60ed53a064864718dc Mon Sep 17 00:00:00 2001 From: Simon Bein Date: Wed, 17 Apr 2024 10:45:30 +0200 Subject: [PATCH 4/6] rename to isStuck Signed-off-by: Simon Bein --- pkg/applications/fake/fake_installer.go | 6 +++--- pkg/applications/installer.go | 10 +++++----- pkg/applications/providers/template/helm.go | 4 ++-- pkg/applications/providers/types.go | 4 ++-- .../application-installation-controller/controller.go | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pkg/applications/fake/fake_installer.go b/pkg/applications/fake/fake_installer.go index f46b4337648..01659feab69 100644 --- a/pkg/applications/fake/fake_installer.go +++ b/pkg/applications/fake/fake_installer.go @@ -59,7 +59,7 @@ func (a *ApplicationInstallerRecorder) Delete(ctx context.Context, log *zap.Suga return util.NoStatusUpdate, nil } -func (a *ApplicationInstallerRecorder) IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { +func (a *ApplicationInstallerRecorder) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { // NOOP return false, nil } @@ -90,7 +90,7 @@ func (a ApplicationInstallerLogger) Delete(ctx context.Context, log *zap.Sugared return util.NoStatusUpdate, nil } -func (a ApplicationInstallerLogger) IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { +func (a ApplicationInstallerLogger) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { // NOOP return false, nil } @@ -137,7 +137,7 @@ func (c CustomApplicationInstaller) Delete(ctx context.Context, log *zap.Sugared return util.NoStatusUpdate, nil } -func (c CustomApplicationInstaller) IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { +func (c CustomApplicationInstaller) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { // NOOP return false, nil } diff --git a/pkg/applications/installer.go b/pkg/applications/installer.go index 54ec4b5a01e..52ca93bad1c 100644 --- a/pkg/applications/installer.go +++ b/pkg/applications/installer.go @@ -45,8 +45,8 @@ type ApplicationInstaller interface { // Delete function uninstalls the application on the user-cluster and returns an error if the uninstallation has failed. StatusUpdater is guaranteed to be non nil. This is idempotent. Delete(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (util.StatusUpdater, error) - // IsReleaseStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries - IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) + // IsStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries + IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) // Rollback rolls an Application back to the previous release Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error @@ -145,14 +145,14 @@ func (a *ApplicationManager) reconcileNamespace(ctx context.Context, log *zap.Su return nil } -// IsReleaseStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries. -func (a *ApplicationManager) IsReleaseStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { +// IsStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries. +func (a *ApplicationManager) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { templateProvider, err := providers.NewTemplateProvider(ctx, seedClient, a.Kubeconfig, a.ApplicationCache, log, applicationInstallation, a.SecretNamespace) if err != nil { return false, fmt.Errorf("failed to initialize template provider: %w", err) } - return templateProvider.IsReleaseStuck(applicationInstallation) + return templateProvider.IsStuck(applicationInstallation) } // Rollback rolls an Application back to the previous release. diff --git a/pkg/applications/providers/template/helm.go b/pkg/applications/providers/template/helm.go index 4ba76035bdf..9d13183928b 100644 --- a/pkg/applications/providers/template/helm.go +++ b/pkg/applications/providers/template/helm.go @@ -209,10 +209,10 @@ func getDeployOpts(appDefinition *appskubermaticv1.ApplicationDefinition, appIns return helmclient.NewDeployOpts(false, 0, false, false) } -// IsReleaseStuck aims to identify if a helm release is stuck. This targets an upstream issue in helm, which has not been resolved. For further details see: +// IsStuck aims to identify if a helm release is stuck. This targets an upstream issue in helm, which has not been resolved. For further details see: // - https://github.com/helm/helm/issues/7476 // - https://github.com/helm/helm/issues/4558 -func (h HelmTemplate) IsReleaseStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { +func (h HelmTemplate) IsStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) { // if the release was successful, exit early if applicationInstallation.Status.Conditions[appskubermaticv1.Ready].Status == "True" { return false, nil diff --git a/pkg/applications/providers/types.go b/pkg/applications/providers/types.go index 0103d32772b..72134e16925 100644 --- a/pkg/applications/providers/types.go +++ b/pkg/applications/providers/types.go @@ -60,8 +60,8 @@ type TemplateProvider interface { // Uninstall the application. Uninstall(applicationInstallation *appskubermaticv1.ApplicationInstallation) (util.StatusUpdater, error) - // Check if a Release is stuck - IsReleaseStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) + // Check if a release is stuck + IsStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) // Rollback the Application to the previous release Rollback(applicationInstallation *appskubermaticv1.ApplicationInstallation) error diff --git a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go index cc561b8af9b..e7bb77223ca 100644 --- a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go +++ b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go @@ -244,7 +244,7 @@ func (r *reconciler) handleInstallation(ctx context.Context, log *zap.SugaredLog // Because some upstream tools are not completely idempotent, we need a check to make sure a release is not stuck. // This should be run before we make any changes to the status field, so we can use it in our analysis - stuck, err := r.appInstaller.IsReleaseStuck(ctx, log, r.seedClient, r.userClient, appInstallation) + stuck, err := r.appInstaller.IsStuck(ctx, log, r.seedClient, r.userClient, appInstallation) if err != nil { return fmt.Errorf("failed to check if the previous release is stuck: %w", err) } From 445db8eaec818370349f0a10cc444159f1018659 Mon Sep 17 00:00:00 2001 From: Simon Bein Date: Wed, 17 Apr 2024 10:46:33 +0200 Subject: [PATCH 5/6] remove timeout from message, as we do not explicitly check for it Signed-off-by: Simon Bein --- .../application-installation-controller/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go index e7bb77223ca..77c875ee603 100644 --- a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go +++ b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go @@ -249,7 +249,7 @@ func (r *reconciler) handleInstallation(ctx context.Context, log *zap.SugaredLog return fmt.Errorf("failed to check if the previous release is stuck: %w", err) } if stuck { - log.Infof("Release %q seems to be stuck in pending even after timeout, attempting rollback now") + log.Infof("Release %q seems to be stuck, attempting rollback now") if err := r.appInstaller.Rollback(ctx, log, r.seedClient, r.userClient, appInstallation); err != nil { return fmt.Errorf("failed to rollback release: %w", err) } From c50c8d71154e48cf288ac0c302451b842a9230c6 Mon Sep 17 00:00:00 2001 From: Simon Bein Date: Wed, 17 Apr 2024 12:58:44 +0200 Subject: [PATCH 6/6] remove printing directive for AppInstall, as we already print it centrally via the logger Signed-off-by: Simon Bein --- .../application-installation-controller/controller.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go index 77c875ee603..6b88d1de63b 100644 --- a/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go +++ b/pkg/controller/user-cluster-controller-manager/application-installation-controller/controller.go @@ -249,11 +249,11 @@ func (r *reconciler) handleInstallation(ctx context.Context, log *zap.SugaredLog return fmt.Errorf("failed to check if the previous release is stuck: %w", err) } if stuck { - log.Infof("Release %q seems to be stuck, attempting rollback now") + log.Infof("Release for ApplicationInstallation seems to be stuck, attempting rollback now") if err := r.appInstaller.Rollback(ctx, log, r.seedClient, r.userClient, appInstallation); err != nil { return fmt.Errorf("failed to rollback release: %w", err) } - log.Infof("Release %q has been rolled back successfully") + log.Infof("Release for ApplicationInstallation has been rolled back successfully") } downloadDest, err := os.MkdirTemp(r.appInstaller.GetAppCache(), appInstallation.Namespace+"-"+appInstallation.Name)