Skip to content

Commit

Permalink
destroy: provide a way to stop all uninstalls using context
Browse files Browse the repository at this point in the history
The Run function in the Destroyer interface was modified to take a context as
a parameter. This provides a way for the user stop the uninstall after a
period of time by providing a context with a deadline.

A "--timeout" flag was added to the `openshift-install destroy cluster` command.
This allows the user to time out the destroy after a specified number of seconds.

The baremetal, libvirt, openstack, and ovirt providers do not provide a means
by which most requests made to the provider can be stopped prematurely. In these
cases, the context is checked prior to making the requests as a best effort.
But the uninstall may continue for a period of time after the context is done.

The RunWithContext function introduced in openshift#3765
for AWS has been obsoleted since the Run function now accepts a context.

This will be used by Hive to backoff uninstall attempts.

https://issues.redhat.com/browse/CO-974
  • Loading branch information
staebler committed Jun 24, 2020
1 parent 06b27c6 commit cf24780
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 76 deletions.
21 changes: 17 additions & 4 deletions cmd/openshift-install/destroy.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package main

import (
"context"
"os"
"path/filepath"
"time"

"github.com/pkg/errors"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -38,29 +40,40 @@ func newDestroyCmd() *cobra.Command {
}

func newDestroyClusterCmd() *cobra.Command {
return &cobra.Command{
var timeoutSeconds int
cmd := &cobra.Command{
Use: "cluster",
Short: "Destroy an OpenShift cluster",
Args: cobra.ExactArgs(0),
Run: func(_ *cobra.Command, _ []string) {
cleanup := setupFileHook(rootOpts.dir)
defer cleanup()

err := runDestroyCmd(rootOpts.dir)
err := runDestroyCmd(rootOpts.dir, timeoutSeconds)
if err != nil {
logrus.Fatal(err)
}
},
}
cmd.Flags().IntVar(&timeoutSeconds, "timeout", 0, "number of seconds to run before giving up; defaults to running indefinitely")
return cmd
}

func runDestroyCmd(directory string) error {
func runDestroyCmd(directory string, timeoutSeconds int) error {
timer.StartTimer(timer.TotalTimeElapsed)

ctx := context.Background()
if timeoutSeconds > 0 {
ctxWithTimeout, cancel := context.WithTimeout(ctx, time.Duration(timeoutSeconds) * time.Second)
defer cancel()
ctx = ctxWithTimeout
}

destroyer, err := destroy.New(logrus.StandardLogger(), directory)
if err != nil {
return errors.Wrap(err, "Failed while preparing to destroy cluster")
}
if err := destroyer.Run(); err != nil {
if err := destroyer.Run(ctx); err != nil {
return errors.Wrap(err, "Failed to destroy cluster")
}

Expand Down
8 changes: 1 addition & 7 deletions pkg/destroy/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,7 @@ func (o *ClusterUninstaller) validate() error {
}

// Run is the entrypoint to start the uninstall process
func (o *ClusterUninstaller) Run() error {
return o.RunWithContext(context.Background())
}

// RunWithContext runs the uninstall process with a context.
// The first return is the list of ARNs for resources that could not be destroyed.
func (o *ClusterUninstaller) RunWithContext(ctx context.Context) error {
func (o *ClusterUninstaller) Run(ctx context.Context) error {
err := o.validate()
if err != nil {
return err
Expand Down
8 changes: 4 additions & 4 deletions pkg/destroy/azure/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,21 +90,21 @@ func New(logger logrus.FieldLogger, metadata *types.ClusterMetadata) (providers.
}

// Run is the entrypoint to start the uninstall process.
func (o *ClusterUninstaller) Run() error {
func (o *ClusterUninstaller) Run(ctx context.Context) error {
o.configureClients()
group := o.InfraID + "-rg"
o.Logger.Debug("deleting public records")
if err := deletePublicRecords(context.TODO(), o.zonesClient, o.recordsClient, o.privateZonesClient, o.privateRecordSetsClient, o.Logger, group); err != nil {
if err := deletePublicRecords(ctx, o.zonesClient, o.recordsClient, o.privateZonesClient, o.privateRecordSetsClient, o.Logger, group); err != nil {
o.Logger.Debug(err)
return errors.Wrap(err, "failed to delete public DNS records")
}
o.Logger.Debug("deleting resource group")
if err := deleteResourceGroup(context.TODO(), o.resourceGroupsClient, o.Logger, group); err != nil {
if err := deleteResourceGroup(ctx, o.resourceGroupsClient, o.Logger, group); err != nil {
o.Logger.Debug(err)
return errors.Wrap(err, "failed to delete resource group")
}
o.Logger.Debug("deleting application registrations")
if err := deleteApplicationRegistrations(context.TODO(), o.applicationsClient, o.serviceprincipalsClient, o.Logger, o.InfraID); err != nil {
if err := deleteApplicationRegistrations(ctx, o.applicationsClient, o.serviceprincipalsClient, o.Logger, o.InfraID); err != nil {
o.Logger.Debug(err)
return errors.Wrap(err, "failed to delete application registrations and their service principals")
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/destroy/baremetal/baremetal.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
package baremetal

import (
"context"

"github.com/libvirt/libvirt-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
Expand All @@ -19,7 +21,7 @@ type ClusterUninstaller struct {
}

// Run is the entrypoint to start the uninstall process.
func (o *ClusterUninstaller) Run() error {
func (o *ClusterUninstaller) Run(ctx context.Context) error {
o.Logger.Debug("Deleting bare metal resources")

// FIXME: close the connection
Expand Down
15 changes: 15 additions & 0 deletions pkg/destroy/context/context.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package context

import (
"context"
)

// IsContextDone tests, without blocking, if the context is done.
func IsContextDone(ctx context.Context) bool {
select {
case <-ctx.Done():
return true
default:
return false
}
}
6 changes: 6 additions & 0 deletions pkg/destroy/context/context_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package context

import "testing"

func TestIsContextDone(t *testing.T) {
}
5 changes: 2 additions & 3 deletions pkg/destroy/gcp/gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,8 @@ func New(logger logrus.FieldLogger, metadata *types.ClusterMetadata) (providers.
}

// Run is the entrypoint to start the uninstall process
func (o *ClusterUninstaller) Run() error {
ctx, cancel := o.contextWithTimeout()
defer cancel()
func (o *ClusterUninstaller) Run(ctx context.Context) error {
o.Context = ctx

ssn, err := gcpconfig.GetSession(ctx)
if err != nil {
Expand Down
39 changes: 32 additions & 7 deletions pkg/destroy/libvirt/libvirt.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package libvirt

import (
"context"
"strings"

libvirt "github.com/libvirt/libvirt-go"
Expand Down Expand Up @@ -38,7 +39,7 @@ var AlwaysTrueFilter = func() filterFunc {
}

// deleteFunc is the interface a function needs to implement to be delete resources.
type deleteFunc func(conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) error
type deleteFunc func(ctx context.Context, conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) error

// ClusterUninstaller holds the various options for the cluster we want to delete.
type ClusterUninstaller struct {
Expand All @@ -57,7 +58,7 @@ func New(logger logrus.FieldLogger, metadata *types.ClusterMetadata) (providers.
}

// Run is the entrypoint to start the uninstall process.
func (o *ClusterUninstaller) Run() error {
func (o *ClusterUninstaller) Run(ctx context.Context) error {
conn, err := libvirt.NewConnect(o.LibvirtURI)
if err != nil {
return errors.Wrap(err, "failed to connect to Libvirt daemon")
Expand All @@ -68,7 +69,7 @@ func (o *ClusterUninstaller) Run() error {
deleteNetwork,
deleteStoragePool,
} {
err = del(conn, o.Filter, o.Logger)
err = del(ctx, conn, o.Filter, o.Logger)
if err != nil {
return err
}
Expand All @@ -82,11 +83,14 @@ func (o *ClusterUninstaller) Run() error {
// additional nodes after the initial list call. We continue deleting
// domains until we either hit an error or we have a list call with no
// matching domains.
func deleteDomains(conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) error {
func deleteDomains(ctx context.Context, conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) error {
logger.Debug("Deleting libvirt domains")
var err error
nothingToDelete := false
for !nothingToDelete {
if err := ctx.Err(); err != nil {
return err
}
nothingToDelete, err = deleteDomainsSinglePass(conn, filter, logger)
if err != nil {
return err
Expand All @@ -95,7 +99,7 @@ func deleteDomains(conn *libvirt.Connect, filter filterFunc, logger logrus.Field
return nil
}

func deleteDomainsSinglePass(conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) (nothingToDelete bool, err error) {
func deleteDomainsSinglePass(ctx context.Context, conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) (nothingToDelete bool, err error) {
domains, err := conn.ListAllDomains(0)
if err != nil {
return false, errors.Wrap(err, "list domains")
Expand All @@ -113,6 +117,11 @@ func deleteDomainsSinglePass(conn *libvirt.Connect, filter filterFunc, logger lo
}

nothingToDelete = false

if ctx.Err() != nil {
continue
}

dState, _, err := domain.GetState()
if err != nil {
return false, errors.Wrapf(err, "get domain state %d", dName)
Expand All @@ -132,7 +141,11 @@ func deleteDomainsSinglePass(conn *libvirt.Connect, filter filterFunc, logger lo
return nothingToDelete, nil
}

func deleteStoragePool(conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) error {
func deleteStoragePool(ctx context.Context, conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) error {
if err := ctx.Err(); err != nil {
return err
}

logger.Debug("Deleting libvirt volumes")

pools, err := conn.ListStoragePools()
Expand All @@ -141,6 +154,10 @@ func deleteStoragePool(conn *libvirt.Connect, filter filterFunc, logger logrus.F
}

for _, pname := range pools {
if err := ctx.Err(); err != nil {
return err
}

// pool name that returns true from filter
if !filter(pname) {
continue
Expand Down Expand Up @@ -188,7 +205,11 @@ func deleteStoragePool(conn *libvirt.Connect, filter filterFunc, logger logrus.F
return nil
}

func deleteNetwork(conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) error {
func deleteNetwork(ctx context.Context, conn *libvirt.Connect, filter filterFunc, logger logrus.FieldLogger) error {
if err := ctx.Err(); err != nil {
return err
}

logger.Debug("Deleting libvirt network")

networks, err := conn.ListNetworks()
Expand All @@ -197,6 +218,10 @@ func deleteNetwork(conn *libvirt.Connect, filter filterFunc, logger logrus.Field
}

for _, nName := range networks {
if err := ctx.Err(); err != nil {
return err
}

if !filter(nName) {
continue
}
Expand Down

0 comments on commit cf24780

Please sign in to comment.