Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ coverage.*
profile.cov
*hub-bundle*.tar.gz
*spoke-bundle*.tar.gz
*fleetconfig-support-bundle/

# Dependency directories (remove the comment below to include it)
# vendor/
Expand All @@ -40,6 +41,7 @@ go.work.sum
venv/
*__pycache__/
requirements.txt
.DS_Store

# Temp files
tmp/
tmp/
14 changes: 13 additions & 1 deletion fleetconfig-controller/internal/controller/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,19 @@ func handleAddonDisable(ctx context.Context, spokeName string, addons []string)
stdout, stderr, err := exec_utils.CmdWithLogs(ctx, cmd, "waiting for 'clusteradm addon disable' to complete...")
if err != nil {
out := append(stdout, stderr...)
return fmt.Errorf("failed to disable addons: %v, output: %s", err, string(out))
outStr := string(out)

// Check if the error is due to addon not being found or cluster not found - these are success cases
if strings.Contains(outStr, "add-on not found") {
logger.V(5).Info("addon already disabled (not found)", "managedcluster", spokeName, "addons", addons, "output", outStr)
return nil
}
if strings.Contains(outStr, "managedclusters.cluster.open-cluster-management.io") && strings.Contains(outStr, "not found") {
logger.V(5).Info("addon disable skipped (cluster not found)", "managedcluster", spokeName, "addons", addons, "output", outStr)
return nil
}

return fmt.Errorf("failed to disable addons: %v, output: %s", err, outStr)
}
logger.V(1).Info("disabled addons", "managedcluster", spokeName, "addons", addons, "output", string(stdout))
return nil
Expand Down
66 changes: 58 additions & 8 deletions fleetconfig-controller/internal/controller/spoke.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@ import (
"regexp"
"slices"
"strings"
"time"

certificatesv1 "k8s.io/api/certificates/v1"
corev1 "k8s.io/api/core/v1"
kerrs "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
workapi "open-cluster-management.io/api/client/work/clientset/versioned"
clusterv1 "open-cluster-management.io/api/cluster/v1"
operatorv1 "open-cluster-management.io/api/operator/v1"
workv1 "open-cluster-management.io/api/work/v1"
Expand All @@ -32,8 +35,10 @@ import (
var csrSuffixPattern = regexp.MustCompile(`-[a-zA-Z0-9]{5}$`)

const (
amwExistsError = "you should manually clean them, uninstall kluster will cause those works out of control."
managedClusterAddOn = "ManagedClusterAddOn"
amwExistsError = "you should manually clean them, uninstall kluster will cause those works out of control."
managedClusterAddOn = "ManagedClusterAddOn"
addonCleanupTimeout = 1 * time.Minute
addonCleanupPollInterval = 2 * time.Second
)

// handleSpokes manages Spoke cluster join and upgrade operations
Expand Down Expand Up @@ -575,6 +580,7 @@ func deregisterSpoke(ctx context.Context, kClient client.Client, hubKubeconfig [
if err != nil {
return err
}

// skip clean up if the ManagedCluster resource is not found or if any manifestWorks exist
managedCluster, err := clusterC.ClusterV1().ManagedClusters().Get(ctx, spoke.Name, metav1.GetOptions{})
if kerrs.IsNotFound(err) {
Expand All @@ -599,16 +605,25 @@ func deregisterSpoke(ctx context.Context, kClient client.Client, hubKubeconfig [
// remove addons only after confirming that the cluster can be unjoined - this avoids leaving dangling resources that may rely on the addon
if err := handleAddonDisable(ctx, spoke.Name, spoke.EnabledAddons); err != nil {
fc.SetConditions(true, v1alpha1.NewCondition(
"AddonsDisabled", spoke.AddonDisableType(), metav1.ConditionFalse, metav1.ConditionTrue,
err.Error(), spoke.AddonDisableType(), metav1.ConditionFalse, metav1.ConditionTrue,
))
return err
}

if len(spoke.EnabledAddons) > 0 {
// Wait for addon manifestWorks to be fully cleaned up before proceeding with unjoin
if err := waitForAddonManifestWorksCleanup(ctx, workC, spoke.Name, addonCleanupTimeout); err != nil {
fc.SetConditions(true, v1alpha1.NewCondition(
err.Error(), spoke.AddonDisableType(), metav1.ConditionFalse, metav1.ConditionTrue,
))
return fmt.Errorf("addon manifestWorks cleanup failed: %w", err)
}
fc.SetConditions(true, v1alpha1.NewCondition(
"AddonsDisabled", spoke.AddonDisableType(), metav1.ConditionTrue, metav1.ConditionTrue,
))
}
// unjoin spoke

// unjoin spoke - safe to proceed now that addon cleanup is confirmed
if err := unjoinSpoke(ctx, kClient, fc, spoke); err != nil {
return err
}
Expand All @@ -628,14 +643,14 @@ func deregisterSpoke(ctx context.Context, kClient client.Client, hubKubeconfig [
}

// remove ManagedCluster
if err = clusterC.ClusterV1().ManagedClusters().Delete(ctx, spoke.Name, metav1.DeleteOptions{}); err != nil && !kerrs.IsNotFound(err) {
return err
if err = clusterC.ClusterV1().ManagedClusters().Delete(ctx, spoke.Name, metav1.DeleteOptions{}); err != nil {
return client.IgnoreNotFound(err)
}

// remove Namespace
ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: spoke.Name}}
if err := kClient.Delete(ctx, ns); err != nil && !kerrs.IsNotFound(err) {
return err
if err := kClient.Delete(ctx, ns); err != nil {
return client.IgnoreNotFound(err)
}

return nil
Expand All @@ -652,6 +667,41 @@ func allOwnersAddOns(mws []workv1.ManifestWork) bool {
return true
}

// waitForAddonManifestWorksCleanup polls for addon-related manifestWorks to be removed
// after addon disable operation to avoid race conditions during spoke unjoin
func waitForAddonManifestWorksCleanup(ctx context.Context, workC *workapi.Clientset, spokeName string, timeout time.Duration) error {
logger := log.FromContext(ctx)
logger.V(1).Info("waiting for addon manifestWorks cleanup", "spokeName", spokeName, "timeout", timeout)

err := wait.PollUntilContextTimeout(ctx, addonCleanupPollInterval, timeout, true, func(ctx context.Context) (bool, error) {
manifestWorks, err := workC.WorkV1().ManifestWorks(spokeName).List(ctx, metav1.ListOptions{})
if err != nil {
logger.V(3).Info("failed to list manifestWorks during cleanup wait", "error", err)
// Return false to continue polling on transient errors
return false, nil
}

// Success condition: no manifestWorks remaining
if len(manifestWorks.Items) == 0 {
logger.V(1).Info("addon manifestWorks cleanup completed", "spokeName", spokeName, "remainingManifestWorks", len(manifestWorks.Items))
return true, nil
}

logger.V(3).Info("waiting for addon manifestWorks cleanup",
"spokeName", spokeName,
"addonManifestWorks", len(manifestWorks.Items))

// Continue polling
return false, nil
})

if err != nil {
return fmt.Errorf("timeout waiting for addon manifestWorks cleanup for spoke %s: %w", spokeName, err)
}

return nil
}

// prepareKlusterletValuesFile creates a temporary file with klusterlet values and returns
// args to append and a cleanup function. Returns empty slice if values are empty.
func prepareKlusterletValuesFile(values *v1alpha1.KlusterletChartConfig) ([]string, func(), error) {
Expand Down
Loading