Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Applications: discover if a release is stuck and rollback if necessary #13301

Merged
merged 6 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 29 additions & 0 deletions pkg/applications/fake/fake_installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ func (a *ApplicationInstallerRecorder) Delete(ctx context.Context, log *zap.Suga
return util.NoStatusUpdate, nil
}

func (a *ApplicationInstallerRecorder) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
// NOOP
return false, nil
}

func (a *ApplicationInstallerRecorder) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
return nil
}

// ApplicationInstallerLogger is a fake ApplicationInstaller that just logs actions. it's used for the development of the controller.
type ApplicationInstallerLogger struct {
}
Expand All @@ -81,6 +90,16 @@ func (a ApplicationInstallerLogger) Delete(ctx context.Context, log *zap.Sugared
return util.NoStatusUpdate, nil
}

func (a ApplicationInstallerLogger) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
// NOOP
return false, nil
}

func (a ApplicationInstallerLogger) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
// NOOP
return nil
}

// CustomApplicationInstaller is an applicationInstaller in which every function can be independently mocked.
// If a function is not mocked, then default values are returned.
type CustomApplicationInstaller struct {
Expand Down Expand Up @@ -117,3 +136,13 @@ func (c CustomApplicationInstaller) Delete(ctx context.Context, log *zap.Sugared
}
return util.NoStatusUpdate, nil
}

func (c CustomApplicationInstaller) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
// NOOP
return false, nil
}

func (c CustomApplicationInstaller) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
// NOOP
return nil
}
20 changes: 20 additions & 0 deletions pkg/applications/helmclient/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,26 @@ func (h HelmClient) Uninstall(releaseName string) (*release.UninstallReleaseResp
return uninstallReleaseResponse, err
}

// GetMetadata wraps helms GetMetadata command to be used with our ActionConfig.
func (h HelmClient) GetMetadata(releaseName string) (*action.Metadata, error) {
client := action.NewGetMetadata(h.actionConfig)
res, err := client.Run(releaseName)
if err != nil {
return nil, fmt.Errorf("Could not retrieve metadata for release %q: %w", releaseName, err)
}
return res, nil
}

// Rollback wraps helms Rollback command to be used with our ActionConfig.
func (h HelmClient) Rollback(releaseName string) error {
client := action.NewRollback(h.actionConfig)
err := client.Run(releaseName)
if err != nil {
return fmt.Errorf("Could not rollback release %q: %w", releaseName, err)
}
return nil
}

// buildDependencies adds missing repositories and then does a Helm dependency build (i.e. download the chart dependencies
// from repositories into "charts" folder).
func (h HelmClient) buildDependencies(chartLoc string, auth AuthSettings) (*chart.Chart, error) {
Expand Down
26 changes: 26 additions & 0 deletions pkg/applications/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ type ApplicationInstaller interface {

// Delete function uninstalls the application on the user-cluster and returns an error if the uninstallation has failed. StatusUpdater is guaranteed to be non nil. This is idempotent.
Delete(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (util.StatusUpdater, error)

// IsStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries
IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error)

// Rollback rolls an Application back to the previous release
Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error
}

// ApplicationManager handles the installation / uninstallation of an Application on the user-cluster.
Expand Down Expand Up @@ -138,3 +144,23 @@ func (a *ApplicationManager) reconcileNamespace(ctx context.Context, log *zap.Su
}
return nil
}

// IsStuck determines if a release is stuck. Its main purpose is to detect inconsistent behavior in upstream Application libraries.
func (a *ApplicationManager) IsStuck(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
templateProvider, err := providers.NewTemplateProvider(ctx, seedClient, a.Kubeconfig, a.ApplicationCache, log, applicationInstallation, a.SecretNamespace)
if err != nil {
return false, fmt.Errorf("failed to initialize template provider: %w", err)
}

return templateProvider.IsStuck(applicationInstallation)
}

// Rollback rolls an Application back to the previous release.
func (a *ApplicationManager) Rollback(ctx context.Context, log *zap.SugaredLogger, seedClient ctrlruntimeclient.Client, userClient ctrlruntimeclient.Client, applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
templateProvider, err := providers.NewTemplateProvider(ctx, seedClient, a.Kubeconfig, a.ApplicationCache, log, applicationInstallation, a.SecretNamespace)
if err != nil {
return fmt.Errorf("failed to initialize template provider: %w", err)
}

return templateProvider.Rollback(applicationInstallation)
}
73 changes: 73 additions & 0 deletions pkg/applications/providers/template/helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,76 @@ func getDeployOpts(appDefinition *appskubermaticv1.ApplicationDefinition, appIns
// Fallback to default options.
return helmclient.NewDeployOpts(false, 0, false, false)
}

// IsStuck aims to identify if a helm release is stuck. This targets an upstream issue in helm, which has not been resolved. For further details see:
// - https://github.com/helm/helm/issues/7476
// - https://github.com/helm/helm/issues/4558
func (h HelmTemplate) IsStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error) {
// if the release was successful, exit early
if applicationInstallation.Status.Conditions[appskubermaticv1.Ready].Status == "True" {
return false, nil
}
// currently we observe the stuck error exclusively with this message. If it does not exist, exit early
if applicationInstallation.Status.Conditions[appskubermaticv1.Ready].Message != "another operation (install/upgrade/rollback) is in progress" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love helm
/s

return false, nil
}

helmCacheDir, err := util.CreateHelmTempDir(h.CacheDir)
if err != nil {
return false, fmt.Errorf("failed to create helmCacheDir: %w", err)
}

defer util.CleanUpHelmTempDir(helmCacheDir, h.Log)
restClientGetter := &genericclioptions.ConfigFlags{
KubeConfig: &h.Kubeconfig,
Namespace: &applicationInstallation.Spec.Namespace.Name,
}
helmClient, err := helmclient.NewClient(
h.Ctx,
restClientGetter,
helmclient.NewSettings(helmCacheDir),
applicationInstallation.Spec.Namespace.Name,
h.Log)
if err != nil {
return false, fmt.Errorf("failed to create helmClient: %w", err)
}

// retrieve metadata of the latest release
releaseName := getReleaseName(applicationInstallation)
metadata, err := helmClient.GetMetadata(releaseName)
if err != nil {
return false, fmt.Errorf("failed to retrieve metadata for checking if release %q is stuck: %w", releaseName, err)
}

// if the status of the release is not still pending, exit early
if metadata.Status != "pending-upgrade" {
return false, nil
}

return true, nil
}

// Rollback rolls an Application back to the previous release.
func (h HelmTemplate) Rollback(applicationInstallation *appskubermaticv1.ApplicationInstallation) error {
helmCacheDir, err := util.CreateHelmTempDir(h.CacheDir)
if err != nil {
return fmt.Errorf("failed to create helmCacheDir: %w", err)
}

defer util.CleanUpHelmTempDir(helmCacheDir, h.Log)
restClientGetter := &genericclioptions.ConfigFlags{
KubeConfig: &h.Kubeconfig,
Namespace: &applicationInstallation.Spec.Namespace.Name,
}
helmClient, err := helmclient.NewClient(
h.Ctx,
restClientGetter,
helmclient.NewSettings(helmCacheDir),
applicationInstallation.Spec.Namespace.Name,
h.Log)
if err != nil {
return fmt.Errorf("failed to create helmClient: %w", err)
}

return helmClient.Rollback(getReleaseName(applicationInstallation))
}
6 changes: 6 additions & 0 deletions pkg/applications/providers/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ type TemplateProvider interface {

// Uninstall the application.
Uninstall(applicationInstallation *appskubermaticv1.ApplicationInstallation) (util.StatusUpdater, error)

// Check if a release is stuck
IsStuck(applicationInstallation *appskubermaticv1.ApplicationInstallation) (bool, error)

// Rollback the Application to the previous release
Rollback(applicationInstallation *appskubermaticv1.ApplicationInstallation) error
}

// NewTemplateProvider return the concrete implementation of TemplateProvider according to the templateMethod.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,20 @@ func (r *reconciler) handleInstallation(ctx context.Context, log *zap.SugaredLog
return nil
}

// Because some upstream tools are not completely idempotent, we need a check to make sure a release is not stuck.
// This should be run before we make any changes to the status field, so we can use it in our analysis
stuck, err := r.appInstaller.IsStuck(ctx, log, r.seedClient, r.userClient, appInstallation)
if err != nil {
return fmt.Errorf("failed to check if the previous release is stuck: %w", err)
}
if stuck {
log.Infof("Release for ApplicationInstallation seems to be stuck, attempting rollback now")
if err := r.appInstaller.Rollback(ctx, log, r.seedClient, r.userClient, appInstallation); err != nil {
return fmt.Errorf("failed to rollback release: %w", err)
}
log.Infof("Release for ApplicationInstallation has been rolled back successfully")
}

downloadDest, err := os.MkdirTemp(r.appInstaller.GetAppCache(), appInstallation.Namespace+"-"+appInstallation.Name)
if err != nil {
return fmt.Errorf("failed to create temporary directory where application source will be downloaded: %w", err)
Expand Down