diff --git a/cleanup/ad-guid-README.md b/cleanup/ad-guid-README.md index 102b1b7f196..d04ede95601 100644 --- a/cleanup/ad-guid-README.md +++ b/cleanup/ad-guid-README.md @@ -2,6 +2,13 @@ **It is recommended to take a snapshot of Rancher before performing this in the event that a restore is required.** + +## Critical Notes +* This script will delete and recreate CRTBs/PRTBs/GRBs, which may cause issues with tools (like terraform) which maintain external state. The original object names are stored in an annotation on the new objects. +* It is recommended to use this script on Rancher v2.7.6 - running this on v2.7.5 may produce performance issues +* This script requires that the Active Directory service account has permissions to read all users known to Rancher. + + ## Purpose In order to reverse the effects of migrating Active Directory principalIDs to be based on GUID rather than DN this @@ -16,7 +23,7 @@ This utility will: This utility will go through all Rancher users and perform an Active Directory lookup using the configured service account to get the user's distinguished name. Next, it will perform lookups inside Rancher for all the user's Tokens, -ClusterRoleTemplateBindings, and ProjectRoleTemplateBindings. If any of those objects, including the user object +ClusterRoleTemplateBindings, ProjectRoleTemplateBindings, and GlobalRoleBindings. If any of those objects, including the user object itself are referencing a principalID based on the GUID of that user, those objects will be updated to reference the distinguished name-based principalID (unless the utility is run with -dry-run, in that case the only results are log messages indicating the changes that would be made by a run without that flag). @@ -52,7 +59,7 @@ Active Directory is not the authentication provider, this utility will take no a immediately exit. In order to allow it to run again, you can either edit the configmap to remove that key or you can delete the configmap entirely. -* When migrating ClusterRoleTemplateBindings and ProjectRoleTemplateBindings, it is necessary to perform the action +* When migrating ClusterRoleTemplateBindings, ProjectRoleTemplateBindings, and GlobalRoleBindings it is necessary to perform the action as a delete/create rather than an update. **This may cause issues if you use tooling that relies on the names of the objects**. When a ClusterRoleTemplateBinding or a ProjectRoleTemplateBinding is migrated to a new name, the newly created object will contain a label, "ad-guid-previous-name", that will have a value of the name of the object that was deleted. diff --git a/cleanup/ad-guid-unmigration.sh b/cleanup/ad-guid-unmigration.sh index 485a88d7593..55bd4b94a57 100755 --- a/cleanup/ad-guid-unmigration.sh +++ b/cleanup/ad-guid-unmigration.sh @@ -20,6 +20,8 @@ It is also important to note that migration of ClusterRoleTemplateBindings and P a delete/create operation rather than an update. This will result in new object names for the migrated bindings. A label with the former object name will be included in the migrated bindings. +The Rancher Agent image to be used with this utility can be found at rancher/rancher-agent:v2.7.6 + It is recommended that you perform a Rancher backup prior to running this utility." CLEAR='\033[0m' @@ -132,6 +134,14 @@ display_banner() { echo "$border" printf "%-${text_width}s \n" "$text" echo "$border" + echo "Dry run: $dry_run" + echo "Delete missing: $delete_missing" + echo "Agent image: $agent_image" + if [[ "$dry_run" = true ]] && [[ "$delete_missing" = true ]] + then + echo "Setting the dry-run option to true overrides the delete-missing option. NO CHANGES WILL BE MADE." + fi + echo "$border" } OPTS=$(getopt -o hnd -l help,dry-run,delete-missing -- "$@") @@ -196,6 +206,7 @@ then fi fi + read -p "Do you want to continue? (y/n): " choice if [[ ! $choice =~ ^[Yy]$ ]]; then echo "Exiting..." diff --git a/pkg/agent/clean/ad_unmigration/ldap.go b/pkg/agent/clean/ad_unmigration/ldap.go index 5dbd47e1e97..5497831825c 100644 --- a/pkg/agent/clean/ad_unmigration/ldap.go +++ b/pkg/agent/clean/ad_unmigration/ldap.go @@ -3,6 +3,7 @@ package ad_unmigration import ( "bytes" "crypto/x509" + "encoding/json" "fmt" "os" "regexp" @@ -10,25 +11,26 @@ import ( "time" ldapv3 "github.com/go-ldap/ldap/v3" - "github.com/mitchellh/mapstructure" "github.com/pkg/errors" - v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" - "github.com/rancher/rancher/pkg/auth/providers/common" - "github.com/rancher/rancher/pkg/auth/providers/common/ldap" - v3client "github.com/rancher/rancher/pkg/client/generated/management/v3" - "github.com/rancher/rancher/pkg/types/config" "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/wait" restclient "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + + v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/auth/providers/common" + "github.com/rancher/rancher/pkg/auth/providers/common/ldap" + v3client "github.com/rancher/rancher/pkg/client/generated/management/v3" + "github.com/rancher/rancher/pkg/types/config" ) // Rancher 2.7.5 serialized binary GUIDs from LDAP using this pattern, so this // is what we should match. Notably this differs from Active Directory GUID // strings, which have dashes and braces as delimiters. -var validRancherGuidPattern = regexp.MustCompile("^[0-9a-f]+$") +var validRancherGUIDPattern = regexp.MustCompile("^[0-9a-f]+$") type LdapErrorNotFound struct{} @@ -169,7 +171,7 @@ func adConfiguration(sc *config.ScaledContext) (*v3.ActiveDirectoryConfig, error storedADConfigMap := u.UnstructuredContent() storedADConfig := &v3.ActiveDirectoryConfig{} - err = mapstructure.Decode(storedADConfigMap, storedADConfig) + err = common.Decode(storedADConfigMap, storedADConfig) if err != nil { logrus.Debugf("[%v] errors while decoding stored AD config: %v", migrateAdUserOperation, err) } @@ -180,7 +182,7 @@ func adConfiguration(sc *config.ScaledContext) (*v3.ActiveDirectoryConfig, error } typemeta := &metav1.ObjectMeta{} - err = mapstructure.Decode(metadataMap, typemeta) + err = common.Decode(metadataMap, typemeta) if err != nil { logrus.Debugf("[%v] errors while decoding typemeta: %v", migrateAdUserOperation, err) } @@ -243,5 +245,44 @@ func isGUID(principalID string) bool { logrus.Errorf("[%v] failed to parse invalid PrincipalID: %v", identifyAdUserOperation, principalID) return false } - return validRancherGuidPattern.MatchString(parts[1]) + return validRancherGUIDPattern.MatchString(parts[1]) +} + +func updateADConfigMigrationStatus(status map[string]string, sc *config.ScaledContext) error { + authConfigObj, err := sc.Management.AuthConfigs("").ObjectClient().UnstructuredClient().Get("activedirectory", metav1.GetOptions{}) + if err != nil { + logrus.Errorf("[%v] failed to obtain activedirecotry authConfigObj: %v", migrateAdUserOperation, err) + return err + } + + authConfigJSON, err := json.Marshal(authConfigObj) + if err != nil { + return fmt.Errorf("failed to marshal authConfig object to JSON: %v", err) + } + + // Create an empty unstructured object to hold the decoded JSON + storedADConfig := &unstructured.Unstructured{} + + // Decode the JSON string into the unstructured object because mapstructure is dropping the metadata + if err := json.Unmarshal(authConfigJSON, storedADConfig); err != nil { + return fmt.Errorf("failed to unmarshal JSON into storedADConfig: %v", err) + } + + // Update annotations with migration status + annotations := storedADConfig.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + for annotation, value := range status { + annotations[adGUIDMigrationPrefix+annotation] = value + } + storedADConfig.SetAnnotations(annotations) + + // Update the AuthConfig object using the unstructured client + _, err = sc.Management.AuthConfigs("").ObjectClient().UnstructuredClient().Update(storedADConfig.GetName(), storedADConfig) + if err != nil { + return fmt.Errorf("failed to update authConfig object: %v", err) + } + + return nil } diff --git a/pkg/agent/clean/ad_unmigration/migrate.go b/pkg/agent/clean/ad_unmigration/migrate.go index dce78f9bdaf..44b0636f3b8 100644 --- a/pkg/agent/clean/ad_unmigration/migrate.go +++ b/pkg/agent/clean/ad_unmigration/migrate.go @@ -8,18 +8,20 @@ package ad_unmigration import ( "context" "fmt" + "time" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" ldapv3 "github.com/go-ldap/ldap/v3" "github.com/pkg/errors" - v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" - "github.com/rancher/rancher/pkg/auth/providers/activedirectory" - "github.com/rancher/rancher/pkg/types/config" "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" restclient "k8s.io/client-go/rest" + + v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/auth/providers/activedirectory" + "github.com/rancher/rancher/pkg/types/config" ) const ( @@ -33,6 +35,7 @@ const ( localPrefix = "local://" adGUIDMigrationLabel = "ad-guid-migration" adGUIDMigrationAnnotation = "ad-guid-migration-data" + adGUIDMigrationPrefix = "migration-" migratedLabelValue = "migrated" migrationPreviousName = "ad-guid-previous-name" AttributeObjectClass = "objectClass" @@ -40,6 +43,7 @@ const ( migrateStatusSkipped = "skippedUsers" migrateStatusMissing = "missingUsers" migrationStatusPercentage = "percentDone" + migrationStatusLastUpdate = "statusLastUpdated" ) type migrateUserWorkUnit struct { @@ -98,7 +102,14 @@ func UnmigrateAdGUIDUsersOnce(sc *config.ScaledContext) error { case activedirectory.StatusMigrationFinished: logrus.Debugf("[%v] ad-guid migration has already been completed, refusing to run again at startup", migrateAdUserOperation) return nil + case activedirectory.StatusMigrationFinishedWithMissing: + logrus.Infof("[%v] ad-guid migration has already been completed. To clean-up missing users, you can run the utility manually", migrateAdUserOperation) + return nil + case activedirectory.StatusMigrationFinishedWithSkipped: + logrus.Infof("[%v] ad-guid migration has already been completed. To try and resolve skipped users, you can run the utility manually", migrateAdUserOperation) + return nil } + } return UnmigrateAdGUIDUsers(&sc.RESTConfig, false, false) } @@ -135,7 +146,10 @@ func UnmigrateAdGUIDUsers(clientConfig *restclient.Config, dryRun bool, deleteMi } defer lConn.Close() + // set the status to running and reset the unmigrated fields err = updateMigrationStatus(sc, activedirectory.StatusMigrationField, activedirectory.StatusMigrationRunning) + updateUnmigratedUsers("", migrateStatusSkipped, true, sc) + updateUnmigratedUsers("", migrateStatusMissing, true, sc) if err != nil { return fmt.Errorf("unable to update migration status configmap: %v", err) } @@ -165,19 +179,19 @@ func UnmigrateAdGUIDUsers(clientConfig *restclient.Config, dryRun bool, deleteMi for _, user := range skippedUsers { logrus.Errorf("[%v] unable to migrate user '%v' due to a connection failure; this user will be skipped", migrateAdUserOperation, user.originalUser.Name) - updateUnmigratedUsers(user.originalUser.Name, migrateStatusSkipped, sc) + updateUnmigratedUsers(user.originalUser.Name, migrateStatusSkipped, false, sc) } for _, missingUser := range missingUsers { if deleteMissingUsers && !dryRun { logrus.Infof("[%v] user '%v' with GUID '%v' does not seem to exist in Active Directory. deleteMissingUsers is true, proceeding to delete this user permanently", migrateAdUserOperation, missingUser.originalUser.Name, missingUser.guid) - updateUnmigratedUsers(missingUser.originalUser.Name, migrateStatusMissing, sc) + updateUnmigratedUsers(missingUser.originalUser.Name, migrateStatusMissing, false, sc) err = sc.Management.Users("").Delete(missingUser.originalUser.Name, &metav1.DeleteOptions{}) if err != nil { logrus.Errorf("[%v] failed to delete missing user '%v' with: %v", migrateAdUserOperation, missingUser.originalUser.Name, err) } } else { logrus.Errorf("[%v] User '%v' with GUID '%v' does not seem to exist in Active Directory. this user will be skipped", migrateAdUserOperation, missingUser.originalUser.Name, missingUser.guid) - updateUnmigratedUsers(missingUser.originalUser.Name, migrateStatusMissing, sc) + updateUnmigratedUsers(missingUser.originalUser.Name, migrateStatusMissing, false, sc) } } @@ -223,7 +237,17 @@ func UnmigrateAdGUIDUsers(clientConfig *restclient.Config, dryRun bool, deleteMi } } - err = updateMigrationStatus(sc, activedirectory.StatusMigrationField, activedirectory.StatusMigrationFinished) + // If we have skipped users, that status will be reported as the overall status + // since that state is potentially resolvable by re-running the utility + var status string + if len(skippedUsers) > 0 { + status = activedirectory.StatusMigrationFinishedWithSkipped + } else if len(missingUsers) > 0 { + status = activedirectory.StatusMigrationFinishedWithMissing + } else { + status = activedirectory.StatusMigrationFinished + } + err = updateMigrationStatus(sc, activedirectory.StatusMigrationField, status) if err != nil { return fmt.Errorf("unable to update migration status configmap: %v", err) } @@ -394,6 +418,7 @@ func updateMigrationStatus(sc *config.ScaledContext, status string, value string cm.Data = map[string]string{} } cm.Data[status] = value + cm.Data[migrationStatusLastUpdate] = metav1.Now().Format(time.RFC3339) if _, err := sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).Update(cm); err != nil { // If the ConfigMap does not exist, create it @@ -404,27 +429,41 @@ func updateMigrationStatus(sc *config.ScaledContext, status string, value string } } } - + err = updateADConfigMigrationStatus(cm.Data, sc) + if err != nil { + return fmt.Errorf("unable to update AuthConfig status: %v", err) + } return nil } -// updateUnmigratedUsers will add a user to the list for the specified migration status in the migration status configmap -func updateUnmigratedUsers(user string, status string, sc *config.ScaledContext) { +// updateUnmigratedUsers will add a user to the list for the specified migration status in the migration status configmap. +// If reset is set to true, it will empty the list. +func updateUnmigratedUsers(user string, status string, reset bool, sc *config.ScaledContext) { cm, err := sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).Get(activedirectory.StatusConfigMapName, metav1.GetOptions{}) if err != nil { logrus.Errorf("[%v] unable to fetch configmap to update %v users: %v", migrateAdUserOperation, status, err) } - currentList := cm.Data[status] - if currentList == "" { - currentList = currentList + user + var currentList string + if reset { + delete(cm.Data, status) } else { - currentList = currentList + "," + user + currentList = cm.Data[status] + if currentList == "" { + currentList = currentList + user + } else { + currentList = currentList + "," + user + } + cm.Data[status] = currentList } - cm.Data[status] = currentList + cm.Data[migrationStatusLastUpdate] = metav1.Now().Format(time.RFC3339) if _, err := sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).Update(cm); err != nil { if err != nil { logrus.Errorf("[%v] unable to update migration status configmap: %v", migrateAdUserOperation, err) } } + err = updateADConfigMigrationStatus(cm.Data, sc) + if err != nil { + logrus.Errorf("unable to update AuthConfig status: %v", err) + } } diff --git a/pkg/auth/providers/activedirectory/activedirectory_provider.go b/pkg/auth/providers/activedirectory/activedirectory_provider.go index ed1dbfb496c..661314a4af5 100644 --- a/pkg/auth/providers/activedirectory/activedirectory_provider.go +++ b/pkg/auth/providers/activedirectory/activedirectory_provider.go @@ -27,17 +27,19 @@ import ( ) const ( - Name = "activedirectory" - UserScope = Name + "_user" - GroupScope = Name + "_group" - ObjectClass = "objectClass" - MemberOfAttribute = "memberOf" - StatusConfigMapName = "ad-guid-migration" - StatusConfigMapNamespace = "cattle-system" - StatusMigrationField = "ad-guid-migration-status" - StatusMigrationFinished = "Finished" - StatusMigrationRunning = "Running" - StatusLoginDisabled = "login is disabled while migration is running" + Name = "activedirectory" + UserScope = Name + "_user" + GroupScope = Name + "_group" + ObjectClass = "objectClass" + MemberOfAttribute = "memberOf" + StatusConfigMapName = "ad-guid-migration" + StatusConfigMapNamespace = "cattle-system" + StatusMigrationField = "ad-guid-migration-status" + StatusMigrationFinished = "Finished" + StatusMigrationRunning = "Running" + StatusMigrationFinishedWithSkipped = "FinishedWithSkipped" + StatusMigrationFinishedWithMissing = "FinishedWithMissing" + StatusLoginDisabled = "login is disabled while migration is running" ) var scopes = []string{UserScope, GroupScope} diff --git a/pkg/auth/providers/common/provider_util.go b/pkg/auth/providers/common/provider_util.go new file mode 100644 index 00000000000..b7573d1ae94 --- /dev/null +++ b/pkg/auth/providers/common/provider_util.go @@ -0,0 +1,46 @@ +package common + +import ( + "fmt" + "reflect" + "time" + + "github.com/mitchellh/mapstructure" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Decode will decode to the output structure by creating a custom decoder +// that uses the stringToK8sTimeHookFunc to handle the metav1.Time field properly. +func Decode(input, output any) error { + decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + DecodeHook: stringToK8sTimeHookFunc(), + Result: output, + }) + if err != nil { + return fmt.Errorf("unable to create decoder for Config: %w", err) + } + err = decoder.Decode(input) + if err != nil { + return fmt.Errorf("unable to decode Config: %w", err) + } + return nil +} + +// stringToTimeHookFunc returns a DecodeHookFunc that converts strings to metav1.Time. +func stringToK8sTimeHookFunc() mapstructure.DecodeHookFunc { + return func( + f reflect.Type, + t reflect.Type, + data interface{}) (interface{}, error) { + if f.Kind() != reflect.String { + return data, nil + } + if t != reflect.TypeOf(metav1.Time{}) { + return data, nil + } + + // Convert it by parsing + stdTime, err := time.Parse(time.RFC3339, data.(string)) + return metav1.Time{Time: stdTime}, err + } +}