Skip to content

Commit

Permalink
Applications: discover if a release is stuck and rollback if necessary (
Browse files Browse the repository at this point in the history
#13301)

* Discover if a release is stuck and rollback if necessary

Signed-off-by: Simon Bein <simontheleg@gmail.com>

* linter + noop stubs

Signed-off-by: Simon Bein <simontheleg@gmail.com>

* add additional logging

Signed-off-by: Simon Bein <simontheleg@gmail.com>

* rename to isStuck

Signed-off-by: Simon Bein <simontheleg@gmail.com>

* remove timeout from message, as we do not explicitly check for it

Signed-off-by: Simon Bein <simontheleg@gmail.com>

* remove printing directive for AppInstall, as we already print it centrally via the logger

Signed-off-by: Simon Bein <simontheleg@gmail.com>

---------

Signed-off-by: Simon Bein <simontheleg@gmail.com>
  • Loading branch information
SimonTheLeg committed Apr 17, 2024
1 parent 2b31c8b commit 9f73c2e
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 0 deletions.
29 changes: 29 additions & 0 deletions pkg/applications/fake/fake_installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ func (a *ApplicationInstallerRecorder) Delete(ctx context.Context, log *zap.Suga
return util.NoStatusUpdate, nil
}

func (a *ApplicationInstallerRecorder) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
// NOOP
return false, nil
}

func (a *ApplicationInstallerRecorder) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
return nil
}

// ApplicationInstallerLogger is a fake ApplicationInstaller that just logs actions. it's used for the development of the controller.
type ApplicationInstallerLogger struct {
}
Expand All @@ -81,6 +90,16 @@ func (a ApplicationInstallerLogger) Delete(ctx context.Context, log *zap.Sugared
return util.NoStatusUpdate, nil
}

func (a ApplicationInstallerLogger) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
// NOOP
return false, nil
}

func (a ApplicationInstallerLogger) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
// NOOP
return nil
}

// CustomApplicationInstaller is an applicationInstaller in which every function can be independently mocked.
// If a function is not mocked, then default values are returned.
type CustomApplicationInstaller struct {
Expand Down Expand Up @@ -117,3 +136,13 @@ func (c CustomApplicationInstaller) Delete(ctx context.Context, log *zap.Sugared
}
return util.NoStatusUpdate, nil
}

func (c CustomApplicationInstaller) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
// NOOP
return false, nil
}

func (c CustomApplicationInstaller) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
// NOOP
return nil
}
20 changes: 20 additions & 0 deletions pkg/applications/helmclient/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,26 @@ func (h HelmClient) Uninstall(releaseName string) (*release.UninstallReleaseResp
return uninstallReleaseResponse, err
}

// GetMetadata wraps helms GetMetadata command to be used with our ActionConfig.
func (h HelmClient) GetMetadata(releaseName string) (*action.Metadata, error) {
client := action.NewGetMetadata(h.actionConfig)
res, err := client.Run(releaseName)
if err != nil {
return nil, fmt.Errorf("Could not retrieve metadata for release %q: %w", releaseName, err)
}
return res, nil
}

// Rollback wraps helms Rollback command to be used with our ActionConfig.
func (h HelmClient) Rollback(releaseName string) error {
client := action.NewRollback(h.actionConfig)
err := client.Run(releaseName)
if err != nil {
return fmt.Errorf("Could not rollback release %q: %w", releaseName, err)
}
return nil
}

// buildDependencies adds missing repositories and then does a Helm dependency build (i.e. download the chart dependencies
// from repositories into "charts" folder).
func (h HelmClient) buildDependencies(chartLoc string, auth AuthSettings) (*chart.Chart, error) {
Expand Down
26 changes: 26 additions & 0 deletions pkg/applications/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ type ApplicationInstaller interface {

// Delete function uninstalls the application on the user-cluster and returns an error if the uninstallation has failed. StatusUpdater is guaranteed to be non nil. This is idempotent.
Delete(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (util.StatusUpdater, error)

// IsStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries
IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error)

// Rollback rolls an Application back to the previous release
Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error
}

// ApplicationManager handles the installation / uninstallation of an Application on the user-cluster.
Expand Down Expand Up @@ -138,3 +144,23 @@ func (a *ApplicationManager) reconcileNamespace(ctx context.Context, log *zap.Su
}
return nil
}

// IsStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries.
func (a *ApplicationManager) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
templateProvider, err := providers.NewTemplateProvider(ctx, seedClient, a.Kubeconfig, a.ApplicationCache, log, applicationInstallation, a.SecretNamespace)
if err != nil {
return false, fmt.Errorf("failed to initialize template provider: %w", err)
}

return templateProvider.IsStuck(applicationInstallation)
}

// Rollback rolls an Application back to the previous release.
func (a *ApplicationManager) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
templateProvider, err := providers.NewTemplateProvider(ctx, seedClient, a.Kubeconfig, a.ApplicationCache, log, applicationInstallation, a.SecretNamespace)
if err != nil {
return fmt.Errorf("failed to initialize template provider: %w", err)
}

return templateProvider.Rollback(applicationInstallation)
}
73 changes: 73 additions & 0 deletions pkg/applications/providers/template/helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,76 @@ func getDeployOpts(appDefinition *appskubermaticv1.ApplicationDefinition, appIns
// Fallback to default options.
return helmclient.NewDeployOpts(false, 0, false, false)
}

// IsStuck aims to identify if a helm release is stuck. This targets an upstream issue in helm, which has not been resolved. For further details see:
// - https://github.com/helm/helm/issues/7476
// - https://github.com/helm/helm/issues/4558
func (h HelmTemplate) IsStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
// if the release was successful, exit early
if applicationInstallation.Status.Conditions[appskubermaticv1.Ready].Status == "True" {
return false, nil
}
// currently we observe the stuck error exclusively with this message. If it does not exist, exit early
if applicationInstallation.Status.Conditions[appskubermaticv1.Ready].Message != "another operation (install/upgrade/rollback) is in progress" {
return false, nil
}

helmCacheDir, err := util.CreateHelmTempDir(h.CacheDir)
if err != nil {
return false, fmt.Errorf("failed to create helmCacheDir: %w", err)
}

defer util.CleanUpHelmTempDir(helmCacheDir, h.Log)
restClientGetter := &genericclioptions.ConfigFlags{
KubeConfig: &h.Kubeconfig,
Namespace: &applicationInstallation.Spec.Namespace.Name,
}
helmClient, err := helmclient.NewClient(
h.Ctx,
restClientGetter,
helmclient.NewSettings(helmCacheDir),
applicationInstallation.Spec.Namespace.Name,
h.Log)
if err != nil {
return false, fmt.Errorf("failed to create helmClient: %w", err)
}

// retrieve metadata of the latest release
releaseName := getReleaseName(applicationInstallation)
metadata, err := helmClient.GetMetadata(releaseName)
if err != nil {
return false, fmt.Errorf("failed to retrieve metadata for checking if release %q is stuck: %w", releaseName, err)
}

// if the status of the release is not still pending, exit early
if metadata.Status != "pending-upgrade" {
return false, nil
}

return true, nil
}

// Rollback rolls an Application back to the previous release.
func (h HelmTemplate) Rollback(applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
helmCacheDir, err := util.CreateHelmTempDir(h.CacheDir)
if err != nil {
return fmt.Errorf("failed to create helmCacheDir: %w", err)
}

defer util.CleanUpHelmTempDir(helmCacheDir, h.Log)
restClientGetter := &genericclioptions.ConfigFlags{
KubeConfig: &h.Kubeconfig,
Namespace: &applicationInstallation.Spec.Namespace.Name,
}
helmClient, err := helmclient.NewClient(
h.Ctx,
restClientGetter,
helmclient.NewSettings(helmCacheDir),
applicationInstallation.Spec.Namespace.Name,
h.Log)
if err != nil {
return fmt.Errorf("failed to create helmClient: %w", err)
}

return helmClient.Rollback(getReleaseName(applicationInstallation))
}
6 changes: 6 additions & 0 deletions pkg/applications/providers/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ type TemplateProvider interface {

// Uninstall the application.
Uninstall(applicationInstallation *appskubermaticv1.ApplicationInstallation) (util.StatusUpdater, error)

// Check if a release is stuck
IsStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error)

// Rollback the Application to the previous release
Rollback(applicationInstallation *appskubermaticv1.ApplicationInstallation) error
}

// NewTemplateProvider return the concrete implementation of TemplateProvider according to the templateMethod.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,20 @@ func (r *reconciler) handleInstallation(ctx context.Context, log *zap.SugaredLog
return nil
}

// Because some upstream tools are not completely idempotent, we need a check to make sure a release is not stuck.
// This should be run before we make any changes to the status field, so we can use it in our analysis
stuck, err := r.appInstaller.IsStuck(ctx, log, r.seedClient, r.userClient, appInstallation)
if err != nil {
return fmt.Errorf("failed to check if the previous release is stuck: %w", err)
}
if stuck {
log.Infof("Release for ApplicationInstallation seems to be stuck, attempting rollback now")
if err := r.appInstaller.Rollback(ctx, log, r.seedClient, r.userClient, appInstallation); err != nil {
return fmt.Errorf("failed to rollback release: %w", err)
}
log.Infof("Release for ApplicationInstallation has been rolled back successfully")
}

downloadDest, err := os.MkdirTemp(r.appInstaller.GetAppCache(), appInstallation.Namespace+"-"+appInstallation.Name)
if err != nil {
return fmt.Errorf("failed to create temporary directory where application source will be downloaded: %w", err)
Expand Down

0 comments on commit 9f73c2e

Please sign in to comment.