From 6c084dfdf51cf99a1a2ee998e826933a4aa504f2 Mon Sep 17 00:00:00 2001 From: Nicholas Flynt Date: Thu, 17 Aug 2023 09:01:45 -0400 Subject: [PATCH] Squashed commit of the following: commit 3db22eb13d5b70335c7543921e062e3feaf343a3 Merge: 80392070c 552fb842b Author: Nicholas Flynt Date: Thu Aug 17 08:57:01 2023 -0400 Merge branch 'uuid-unmigration' of github.com:nflynt/rancher into uuid-unmigration commit 80392070cea1a91f808c12bb5a5c16c358945eca Author: Nicholas Flynt Date: Thu Aug 17 08:56:53 2023 -0400 tiny, tiny fix to logging commit 552fb842b326d40890a104ee67ebcf2a2fcbd711 Merge: ea685171c 99a1814c4 Author: nflynt Date: Thu Aug 17 07:39:00 2023 -0400 Merge pull request #30 from crobby/migrationreview31 Outdent else blocks to make lint happy commit 99a1814c493b69c87cb73edaca6da968d892d2a8 Author: Chad Roberts Date: Thu Aug 17 05:00:47 2023 -0400 Outdent else blocks to make lint happy commit ea685171c76b1f5e84291124856756ec6d3ed574 Author: Nicholas Flynt Date: Wed Aug 16 20:28:14 2023 -0400 Apply exponential retry logic to GRB and Token migrations Also, like *RTBs, these are considered non-fatal if a permanent error of some sort occurs. We continue to migrate the user anyway. commit 4a2ae0b0a95180da4240a7e7002e4face8750b51 Author: Nicholas Flynt Date: Wed Aug 16 19:24:42 2023 -0400 For CRTB/PRTBs, rework error handling to gracefully retry In particular, this treats internal errors (usually related to webhook timeouts) as transient, and retries them with a little bit of exponential backoff. Furthermore, after reviewing some scenarios with Michael, we've decided to consider non-internal errors from the webhook as non-fatal in terms of continuing to process the individual user. There are a few situations where old bindings to disabled templates would otherwise block users from migrating, and this permits those to have a better chance of overall success. commit 35d647c5f1c17c3f1c3a12b428d8c09a904a9f98 Author: Nicholas Flynt Date: Wed Aug 16 16:58:50 2023 -0400 When merging user tokens, copy over all relevant principal fields These aren't used for anything that I'm aware of, so this is really more just for consistency, since we want the two to be fully paired. commit f3e80946a67bc7f4d96c20b5e93e5411b0ef39d7 Author: Nicholas Flynt Date: Wed Aug 16 16:52:15 2023 -0400 Cleanup error handling, consider AD retrieval to be a harder error commit 90f2ec152ec81fcdf9c8321697d01b4e2e23b970 Merge: ffcec58fe b56138bc4 Author: Nicholas Flynt Date: Wed Aug 16 16:13:28 2023 -0400 Merge branch 'uuid-unmigration' of github.com:nflynt/rancher into uuid-unmigration commit ffcec58feacbebdae4ea2f86c0c6e6bd33ee8163 Author: Nicholas Flynt Date: Wed Aug 16 16:13:10 2023 -0400 ... once. Add the DN-based principal once. commit b56138bc44da4769369965696ef79cc1a05ebfbb Merge: 78a66e023 bfb71760e Author: nflynt Date: Wed Aug 16 15:47:45 2023 -0400 Merge pull request #29 from crobby/migrationreview25 Store skipped/missing user count in configmap and do not store the actual list on the authconfig object commit 78a66e023d137c9fa4498ac7e650bb96dcce7e5e Merge: edf35359f df507b531 Author: nflynt Date: Wed Aug 16 15:47:24 2023 -0400 Merge pull request #28 from crobby/migrationreview24 Remove unnecessary json marshal/unmarshal commit edf35359fedb2e41fef609b940a5b60bb6d36265 Merge: b93e6d00c 12020af89 Author: nflynt Date: Wed Aug 16 15:47:10 2023 -0400 Merge pull request #27 from crobby/migrationreview23 Give the job pod a chance to come up before tailing the log commit b93e6d00c3e7c5af78e250f1ae1ba04fecbcb105 Merge: a2c2acb9d 58a0a1d3e Author: nflynt Date: Wed Aug 16 15:46:52 2023 -0400 Merge pull request #26 from crobby/migrationreview22 Now using AuthConfig annotation as source of truth to block login during migration commit a2c2acb9defb212a5fd848ec43c19febb085a261 Author: Nicholas Flynt Date: Wed Aug 16 15:46:06 2023 -0400 Rework allowed user migration to handle duplicates and missing users commit bfb71760e48bc07dcd52d6984ca1cc0443e023b1 Author: Chad Roberts Date: Wed Aug 16 14:38:22 2023 -0400 Store skipped/missing user count in configmap and do not store the actual list on the authconfig object commit df507b53180582233e492344415cc6162d22c17e Author: Chad Roberts Date: Wed Aug 16 13:38:39 2023 -0400 Remove unnecessary json marshal/unmarshal commit 12020af89545e0701f118f318fa088b8982ceca7 Author: Chad Roberts Date: Wed Aug 16 13:01:18 2023 -0400 Give the job pod a chance to come up before tailing the log commit 58a0a1d3ebe52bc7d17fa68027c70803aba91cd3 Author: Chad Roberts Date: Wed Aug 16 12:50:57 2023 -0400 Now using AuthConfig annotation as source of truth to block login during migration commit 3ef3fb08eaa3f52184a8e204f89e20a4cce8d886 Author: Nicholas Flynt Date: Wed Aug 16 12:27:23 2023 -0400 Wait to do the AuthConfig principals until after updating users This kicks off some rancher-side tasks based on the updated list, and we'd really like to make sure that those user changes have been made in advance just for sanity purposes. commit b29bfb836887f03a68d0a346fe7c646f5ac057b1 Author: Nicholas Flynt Date: Wed Aug 16 12:25:30 2023 -0400 When collecting duplicates, we need to track the workunit index commit df0307e26f6e0a0c97788c743217619fb34574a7 Author: Nicholas Flynt Date: Wed Aug 16 09:23:47 2023 -0400 Have the dry run guard writing new principal IDs This is mostly just to make the code clearer and more obvious. The safety is redundant, as the dry run also blocks making changes to the user object later. commit 59bafdf71ea68c8d9c6b2e0bacb39afed46570ea Merge: 2dd525070 2473062c2 Author: nflynt Date: Wed Aug 16 09:12:08 2023 -0400 Merge pull request #25 from crobby/migrationreview21 Append copy of user rather than pointer to duplicate list commit 2473062c25d2faa29f06f4e1e95151537dddc631 Author: Chad Roberts Date: Wed Aug 16 08:00:41 2023 -0400 append copy of user rather than pointer to duplicate list commit 2dd525070ae2ad81b1c5fe53d87f92f464f7c5ca Author: Nicholas Flynt Date: Tue Aug 15 16:48:34 2023 -0400 Explicitly check to see if AD is disabled, and exit success in this case commit 4a3aa8031c1b32ee81fe0096d0a3125edbbfb2bc Author: Nicholas Flynt Date: Tue Aug 15 16:00:25 2023 -0400 Actually *use* the final migration status commit 255ef6856d026ce0499656a9f5838602ee4227d9 Author: Nicholas Flynt Date: Tue Aug 15 15:36:19 2023 -0400 Add uuid-unmigration script, prevent AD logins during execution Squashed commit of the following: commit c2bb101b0b5ff0c62ad83033dc6a2d23b5fbc1df Author: Nicholas Flynt Date: Tue Aug 15 15:13:12 2023 -0400 Add a generic failure status, defer restoring logins on failure states commit f9c039835df885c3268ee3fbe2f5e11213a3d690 Author: Nicholas Flynt Date: Tue Aug 15 13:21:29 2023 -0400 Permit retries (with backoff) when opening the LDAP connection Previously we were considering a failure during open (initial or otherwise) to be a hard, script-ending, permanent failure. That's frankly a bit silly, networks can be tempermental, so this fixes that somewhat. Notably, I can't seem to find any way to check the status of the connection on the lConn object, so we're tracking that manually using a tiny little state object. If there's a cleaner way to inspect this state I am all ears, but I don't think it's a majorly big deal. (Elsewhere in Rancher we don't try to share the ldap connection generally, but here it is a big performance boost, so it is worth the extra trouble.) commit b293d6216fc6d05fbdc0becb802519c488178f36 Author: Nicholas Flynt Date: Tue Aug 15 12:54:43 2023 -0400 Rework token logic to mirror *RTBs This both collects and processes tokens that the old logic would have missed, and is also considerably more efficient, now needing to scan the list of workunits and the list of tokens just once. commit fcd2b34c0a8659a14e80578046d3d7f971249489 Merge: 005f10225 3bdea128a Author: nflynt Date: Tue Aug 15 12:12:36 2023 -0400 Merge pull request #24 from crobby/migrationreview17 Fixing names to make ci happy commit 3bdea128ad265845b7e657c8905c2011aa4e805e Author: Chad Roberts Date: Tue Aug 15 12:09:22 2023 -0400 Fixing names to make ci happy commit 005f1022591610be06dd87bae09c24ea4981a801 Author: Nicholas Flynt Date: Tue Aug 15 12:01:31 2023 -0400 Missing users are Infof, not Errorf commit 540e49406103681e311250f06f1b161db099a4b1 Author: Nicholas Flynt Date: Tue Aug 15 11:10:27 2023 -0400 Don't create/update the configmap object in dry run mode What part of "dry run" did we forget, hrm? commit 9ced565d36cbffb2745e82c3264a74f76554a131 Author: Nicholas Flynt Date: Tue Aug 15 11:00:51 2023 -0400 If the config map is not found, it's fine. (Panic otherwise.) commit 80ea8488208594cd9b1e5089c65d18cc71588f3b Author: Nicholas Flynt Date: Tue Aug 15 10:53:30 2023 -0400 Add logic to migrate list of allowed users commit c12dcef87e974546ea86203de12e34874985b7ee Merge: 33f494aa2 ce1feb40a Author: nflynt Date: Tue Aug 15 09:25:53 2023 -0400 Merge pull request #23 from crobby/migrationreview14 Another round of updates commit 33f494aa26acd918d6f2dc68d79d14f4abd7cbfc Merge: b897e47d6 e944b5724 Author: Nicholas Flynt Date: Tue Aug 15 09:13:15 2023 -0400 Merge branch 'uuid-unmigration' of github.com:nflynt/rancher into uuid-unmigration commit b897e47d6ee5d2197f5a9e0635bdc5c14ca7de6e Author: Nicholas Flynt Date: Tue Aug 15 09:12:51 2023 -0400 Rework CRTB,PRTB collection, add GRB migration logic commit ce1feb40ae67a3776baf5c464bcfcfe7a1c50e82 Author: Chad Roberts Date: Tue Aug 15 07:15:24 2023 -0400 Echoing the set options at the end of the banner commit 089412c12a63ffecba0460d39b49b45288d364e1 Author: Chad Roberts Date: Tue Aug 15 06:44:43 2023 -0400 Adding additional information to README commit a7c94846ff543aeb210de2cf75fca30f580dd9ef Author: Chad Roberts Date: Tue Aug 15 06:38:19 2023 -0400 Include agent image location in banner commit 8854263d35fa0a09360b4a34fec1fbd6791e81b2 Author: Chad Roberts Date: Mon Aug 14 16:31:44 2023 -0400 Mirror script status to authconfig commit 5bc29d50d1d7297fd7f2b6fb0979ba602f32865d Author: Chad Roberts Date: Mon Aug 14 12:50:13 2023 -0400 Update script status codes commit e944b5724739d750c13b4b3e7dadba24dabf5045 Merge: 14c5f7254 80e928b78 Author: nflynt Date: Mon Aug 14 11:36:58 2023 -0400 Merge pull request #22 from crobby/migrationreview13 More updates commit 14c5f7254ad9e2ce553507de2d5f2a560a0e53c4 Merge: a3e85deae 516bdeb98 Author: Nicholas Flynt Date: Mon Aug 14 11:36:03 2023 -0400 Merge branch 'uuid-unmigration' of github.com:nflynt/rancher into uuid-unmigration commit a3e85deae0b0ce26c1a447f9c3b0cdc4dc5b7401 Author: Nicholas Flynt Date: Mon Aug 14 11:35:46 2023 -0400 Break out migration logic into a bunch of smaller files commit 80e928b7823e9de4b3094e25b71c303c53b4d9f6 Author: Chad Roberts Date: Mon Aug 14 10:51:39 2023 -0400 Use configmap cache instead of client commit 516bdeb9875c537d76e99feef1cd2105b6d8eb0f Merge: a89977922 f8369c8f5 Author: nflynt Date: Mon Aug 14 10:13:56 2023 -0400 Merge pull request #21 from crobby/migrationreview12 Display banner before doing version check commit f8369c8f5aa85e4ca37359ff2f0d54e26ce07301 Author: Chad Roberts Date: Mon Aug 14 10:12:31 2023 -0400 Display banner before doing version check commit a89977922b7c1f957ed0fc932a8f90d7d525ea70 Author: nflynt Date: Mon Aug 14 10:08:24 2023 -0400 Update cleanup/ad-guid-README.md Co-authored-by: Michael Bolot commit 4d09212e9e993ec8a683af332cd5867f4ebe6377 Merge: c110ae981 92483fa68 Author: nflynt Date: Mon Aug 14 09:58:56 2023 -0400 Merge pull request #19 from crobby/migrationreview9 Removing unused error type check commit 92483fa68cad8c3d4972c088093511a5f52bd46a Author: Chad Roberts Date: Mon Aug 14 09:51:18 2023 -0400 Removing unused error type check commit c110ae9813b27b6411e806f375b177c9040025ab Author: Nicholas Flynt Date: Thu Aug 10 19:51:16 2023 -0400 goimports the things commit 769114669b52aa8009ccfbc0afda3bfade78a41c Merge: 44d2375b4 645348486 Author: Nicholas Flynt Date: Thu Aug 10 19:19:39 2023 -0400 Merge branch 'uuid-unmigration' of github.com:nflynt/rancher into uuid-unmigration commit 64534848693db9a923c29f16d27f0e9772902b3f Merge: baf84bf12 50286a2c8 Author: nflynt Date: Thu Aug 10 19:19:32 2023 -0400 Merge pull request #18 from crobby/migrationreview7 Fixing error checking commit 44d2375b49267dac90300b7c8998195e8b778866 Author: Nicholas Flynt Date: Thu Aug 10 19:13:58 2023 -0400 Use wait's exponential backoff primitive instead of manual sleeps commit 50286a2c8eafec2d0e9efa32e4a5e782c2644b39 Author: Chad Roberts Date: Thu Aug 10 16:27:48 2023 -0400 Fixing error checking commit baf84bf12ecda8e7ac03dfc8a6667f77a8941afe Author: Nicholas Flynt Date: Thu Aug 10 15:39:13 2023 -0400 Only yell if the user is doing a non-dry-run on v2.7.5 commit eed1416a4091badfa785c05128fd4170644671ea Merge: 9a71e3870 ad00983a0 Author: Nicholas Flynt Date: Thu Aug 10 15:36:53 2023 -0400 Merge branch 'uuid-unmigration' of github.com:nflynt/rancher into uuid-unmigration commit 9a71e38706a925422793e951f23be75da2cdaba5 Author: Nicholas Flynt Date: Thu Aug 10 15:36:08 2023 -0400 Cleanup timeout messaging, lower job start timeout to 5 minutes I misunderstood the bash logic when I first extended that to one hour. 5 minutes for an agent download is somewhat more sensible. commit ad00983a0234e98553875227fd912ae061023543 Merge: 4e18baa91 344a05d59 Author: nflynt Date: Thu Aug 10 15:34:29 2023 -0400 Merge pull request #17 from crobby/migrationreview6 Additional changes after review commit 344a05d59413d45e50bf7822c4411d15507dab73 Author: Chad Roberts Date: Thu Aug 10 14:16:55 2023 -0400 Adding version check for v2.7.5 before doing anything commit 682444d9633ed666d7d25c12aca766e6096a5866 Author: Chad Roberts Date: Thu Aug 10 13:50:05 2023 -0400 Fix-up README for updated usage commit 4e18baa912981ed26d3d01d5ac772c6134e680fc Author: Nicholas Flynt Date: Thu Aug 10 14:54:15 2023 -0400 Spawn relevant resources in the cattle-system namespace commit f96eb3acf845a63172a7fc89bdab6ccc8dedd79f Author: Nicholas Flynt Date: Thu Aug 10 14:12:33 2023 -0400 Move the YAML configuration file into the bash script This dodges the whole "fetch it from a weird URL" thing, and also makes the script a self-contained single file, which is much nicer for support to deal with. commit 275f42b42b4771a1041331f3b34516acde785303 Merge: 4c9876465 b99cab403 Author: nflynt Date: Thu Aug 10 11:16:41 2023 -0400 Merge pull request #16 from crobby/migrationreview5 More post review updates commit b99cab403b41bbece5ab18165e113faa6e998853 Author: Chad Roberts Date: Thu Aug 10 09:53:57 2023 -0400 Fixing up handling of command line options and args commit 4f6da400deac8d56e4630aa42b25e0b21040266c Author: Chad Roberts Date: Thu Aug 10 07:49:20 2023 -0400 Fixing up LdapFoundDuplicateGUID name commit 9f577f6ccfd10d17cca0b0e3f2a5ce20863ffb69 Author: Chad Roberts Date: Thu Aug 10 07:31:20 2023 -0400 Adding percentage done indicator to status config map commit 43f19e40cdedcdf7b550c2ea1f6df47b9ab50334 Author: Chad Roberts Date: Thu Aug 10 07:06:02 2023 -0400 Adding lists of special status users to configmap commit fa9979e1c81469c998d47f60433b4aac9dcd869b Author: Chad Roberts Date: Thu Aug 10 06:33:46 2023 -0400 Adding rancher-cleanup label to all cleanup objects commit 4c9876465443048c80e161c31f4d2aef6485978f Merge: 2d59ac671 c30130365 Author: nflynt Date: Wed Aug 9 17:38:29 2023 -0400 Merge pull request #15 from crobby/migrationreview4 Post review updates commit c301303651a4c5d4e291ce864ecdf183ba7dd0da Author: Chad Roberts Date: Wed Aug 9 17:33:39 2023 -0400 Updated isGUID function commit 2d59ac6715ce9e94f8bb3c8da8e9286990cab0ce Merge: c0cdc07e9 86330c6e9 Author: nflynt Date: Wed Aug 9 17:14:48 2023 -0400 Merge pull request #14 from crobby/migrationreview3 Migration review updates 3 commit c0cdc07e95857796f4248c0e03ad484cb061d42e Author: Nicholas Flynt Date: Wed Aug 9 17:12:22 2023 -0400 Log if we need to skip a CRTB/PRTB due to the user not existing This feels like the safer option versus applying permissions that none of the users we've collected actually have, even with the GUID/DN matching. This situation should be relatively uncommon, as Rancher usually cleans these up when users are deleted, but with the GUID duplicate bug I'm not sure how successful that will have been in practice. Best to be safe (and noisy) commit 86330c6e96d90aae20211736dac9fb5040e9c40e Author: Chad Roberts Date: Wed Aug 9 17:09:05 2023 -0400 Updating SA permissions for nonResourceURLs commit 4ae2d58c6326299ac04ef1d45c5fe20b813ba09d Author: Chad Roberts Date: Wed Aug 9 12:12:19 2023 -0400 Seeding README, adding script banner commit f8c941bc91ef7d2ba86258b4edb596ddea29da69 Author: Chad Roberts Date: Wed Aug 9 11:20:10 2023 -0400 Token collection checking userID and now setting userID and label for token updates commit e742102bb4bf47a17c2da30a811eba4da03453b6 Author: Chad Roberts Date: Wed Aug 9 11:03:04 2023 -0400 Adding additional dry-run logging information commit dc461146039b89e0e42e7f816cf17398ba24418d Author: Nicholas Flynt Date: Wed Aug 9 16:57:02 2023 -0400 Rework CRTB/PRTB collection to check usernames, run through list once There are still nested for loops in here, but they are a bit more hidden :P commit ad32ccde3310df0f6ed3978ee197829813c8246e Merge: ccb0b846d cb98c12fa Author: Nicholas Flynt Date: Wed Aug 9 12:52:25 2023 -0400 Merge branch 'uuid-unmigration' of github.com:nflynt/rancher into uuid-unmigration commit ccb0b846d282c52bf10fc47a194b2ba330e3d548 Author: Nicholas Flynt Date: Wed Aug 9 12:50:27 2023 -0400 Break out the user modification flow into separate functions This mostly cleans up the main loop, but it also separates concerns and makes the smaller bits of logic easier to find and follow. commit aa418938d78fc118465e0fd7716712a6cfc530a4 Author: Nicholas Flynt Date: Wed Aug 9 12:19:08 2023 -0400 Move user principal printing into its respective utility function commit ef909ab7c2b20ba2ae271bde36e4f5d808e089aa Author: Nicholas Flynt Date: Wed Aug 9 12:12:05 2023 -0400 Respect the adConfig's UserObjectClass when performing a GUID lookup This is for parity with the auth provider; most AD configurations shouldn't have changed this from the default. commit 396320570bb7e788b75ec4f62af28e2c6f79ee77 Author: Nicholas Flynt Date: Wed Aug 9 11:44:10 2023 -0400 Consider multiple users with the same GUID as a hard error This shouldn't be possible in practice, so it almost certainly indicates either a configuration error, or something wrong on the AD side of things. Either way we will refuse to process any user that trips this logic, and complain about it quite loudly. commit 0cebb89e2fae4c8e1937778fea036e983aef27b3 Author: Nicholas Flynt Date: Wed Aug 9 11:27:24 2023 -0400 We don't need the scope, so simplify -> getExternalId commit da7ef2280e6480e34cbb22d77bd5f3c3d89398dc Author: Nicholas Flynt Date: Wed Aug 9 11:11:41 2023 -0400 Start the scaledContext. Don't give it managers it doesn't need commit a60b14480f11a81f19277cd77b4ac8597ddf818e Author: Nicholas Flynt Date: Wed Aug 9 10:34:25 2023 -0400 Remove the ratelimiting exception. Prefer safety over speed We need to check the performance ramifications of this during testing, but considering that we will almost certainly be iterating over hundreds of users, we should probably let k8s itself rate limit us so we don't overwhelm whatever is running the control plane. That might otherwise be a nasty situation, especially for stuff like AKS and GKE. commit 16715df42475c07bef66e3982dee5f9905b08ec3 Author: Nicholas Flynt Date: Wed Aug 9 10:32:57 2023 -0400 For bonus safety, redundantly check for dryRun here The logic up top should make this check unnecessary, but we want to be extra certain that in dryRun mode no changes are made, so we'll explicitly guard on it every time. This protects the code less from itself, and more from future modifications. commit cb98c12fac2205803284dbd975e785d283f002fa Merge: e17d56fe3 4d2f73520 Author: nflynt Date: Wed Aug 9 10:20:06 2023 -0400 Merge pull request #13 from crobby/migrationreview2 More updates based on review comments commit 4d2f7352085d3b03739245d5a0e3c32c6c2a85aa Author: Chad Roberts Date: Tue Aug 8 10:17:38 2023 -0400 More updates based on review comments commit e17d56fe37605bdeb0fa6afb4f20e740d4658f0c Author: Nicholas Flynt Date: Mon Aug 7 16:38:59 2023 -0400 EscapeUUID -> escapeUUID commit 139ce3c900d8d99c30cfbccd9d71747c81fcc7b8 Author: Nicholas Flynt Date: Mon Aug 7 16:37:34 2023 -0400 Relocate environment variable use to the agent-specific code path commit 795c94b0ec2c6511e06648e8923ed5d776f69f7d Author: Nicholas Flynt Date: Mon Aug 7 16:33:13 2023 -0400 Remove unnecessary namespace from cluster role definitions commit 01ea868d7dc62c814419cad4fc2394d7bf1fdc5b Author: Nicholas Flynt Date: Mon Aug 7 16:30:53 2023 -0400 One minute is *awfully optimistic.* Let's be more realistic commit b9d4487fe77eb1537eeb5420eba3e1dd1bc25c3f Merge: 17250dab5 0efbb02fd Author: nflynt Date: Mon Aug 7 16:21:42 2023 -0400 Merge pull request #12 from crobby/migrationreview Update based on review comments commit 0efbb02fd7a22c00b9a21553018bc4020608cd40 Author: Chad Roberts Date: Mon Aug 7 15:55:46 2023 -0400 Update based on review comments commit 17250dab59307dd085d927288f52dc0e0996ab24 Author: Nicholas Flynt Date: Mon Aug 7 10:29:05 2023 -0400 Don't hide the migration script from windows agents ... which in hindsight are probably somewhat likely to be using the Active Directory auth provider. commit cadf021ca147526dc0da1b048b41231759f49376 Merge: 9b8fd58a3 3926f7bfb Author: nflynt Date: Mon Aug 7 08:18:10 2023 -0400 Merge pull request #11 from crobby/migrateimports Fixing imports commit 3926f7bfba954e143cdbff79eb31e6fe5a687693 Author: Chad Roberts Date: Sat Aug 5 07:45:25 2023 -0400 Fixing imports commit 9b8fd58a344115517336ed44f6a20ae5599d7144 Merge: de38ffed6 26dd50503 Author: nflynt Date: Fri Aug 4 17:10:43 2023 -0400 Merge pull request #10 from crobby/dntokens Fix tokens going to local principal commit 26dd50503d661ceb95c56a4772a166d5c1f9be96 Author: Chad Roberts Date: Fri Aug 4 17:08:20 2023 -0400 Fix tokens going to local principal commit de38ffed69832f9b46472369bdbe729ab4561758 Author: Nicholas Flynt Date: Fri Aug 4 15:36:12 2023 -0400 Cleanup debug/info logs somewhat commit 1581b5d82163ddd3737673b95339f63e4eee048e Merge: 5dfcda078 29c87eb70 Author: nflynt Date: Fri Aug 4 14:56:22 2023 -0400 Merge pull request #9 from crobby/linter2 More cleaning up lint commit 29c87eb706ea758d88d6a3d76507d4453e8170be Author: Chad Roberts Date: Fri Aug 4 14:54:40 2023 -0400 More cleaning up lint commit 5dfcda078903ec53137053c93d0544b85c1ced30 Merge: a1196635c d37ef2fc8 Author: nflynt Date: Fri Aug 4 14:49:55 2023 -0400 Merge pull request #8 from crobby/linter Cleaning up lint commit d37ef2fc8ffc5a3d1366b60f8f5980117eb2999c Author: Chad Roberts Date: Fri Aug 4 14:47:44 2023 -0400 Cleaning up lint commit a1196635cbc212163cd09c7d932d9ebc4dda34b3 Author: Nicholas Flynt Date: Fri Aug 4 14:38:46 2023 -0400 Add an option to automatically delete missing-guid users This is only available when running the standalone script. At Rancher startup this option is set to false, so missing users will be logged instead and require manual intervention. commit 60f31f8a40b209c9e47322a6f858782e9f04924c Merge: 7e620d5b3 9d8257882 Author: nflynt Date: Fri Aug 4 13:22:56 2023 -0400 Merge pull request #7 from crobby/0805-migration Update migration start logic so an automated run will only happen if another run has not completed commit 9d825788206591e55a5b162d480897d95e1ec0c5 Author: Chad Roberts Date: Fri Aug 4 12:12:56 2023 -0400 Update migration start logic so an automated run will only happen if another run has not completed commit 7e620d5b36e3ab28ae5e136ab20dd489e3d959c0 Merge: 30c9f640b 6c352a588 Author: nflynt Date: Fri Aug 4 11:26:52 2023 -0400 Merge pull request #4 from crobby/migrateatstart Add guid migration to rancher startup commit 30c9f640bcb75f50e6205303d7e48e6e039dc148 Merge: b9aa3920f 72895b416 Author: nflynt Date: Fri Aug 4 11:10:58 2023 -0400 Merge pull request #5 from crobby/0803-migration Make sure annotations/labels are not nil commit 72895b4167ee27c180b80430ffa0e0b7b215fd62 Author: Chad Roberts Date: Thu Aug 3 16:58:56 2023 -0400 Make sure annotations/labels are not nil commit b9aa3920fcfe48fcddd69354ab9db91cd0ca4bb2 Merge: 79762cb21 7546cdf42 Author: nflynt Date: Fri Aug 4 10:43:30 2023 -0400 Merge pull request #6 from crobby/0804-migration Fix crtb, prtb collection and add token collection/migration commit 7546cdf428f85e7e6d138d5ec2fd6ecd7cc8f900 Author: Chad Roberts Date: Fri Aug 4 08:59:54 2023 -0400 Fix crtb, prtb collection and add token collection/migration commit 79762cb21b7f313b4839ccf2a9563a756b482523 Author: Nicholas Flynt Date: Thu Aug 3 18:00:53 2023 -0400 Collect CRTBs and PRTBs in a single pass commit b6b6085cb8caf372002fa9665768a46715022cc2 Merge: 3de5aa34b b3acab974 Author: nflynt Date: Thu Aug 3 11:44:13 2023 -0400 Merge pull request #3 from crobby/0802-2migration Adding annotation/labels for migrated objects also blocking login while migration is active commit b3acab9740b3a4a10285548689d1584044bc5ad1 Author: Chad Roberts Date: Thu Aug 3 11:37:16 2023 -0400 Update role for SA commit 673e765a181a8620554a2fb2236bf7f1233b35ad Author: Chad Roberts Date: Thu Aug 3 09:33:45 2023 -0400 Blocking login while migration is running commit 6c352a588f90364983b3fa27a73fe947c92b156a Author: Chad Roberts Date: Wed Aug 2 13:42:33 2023 -0400 Add guid migration to rancher startup commit 840c5a7a5d9022c69a03f383b4763d424baac216 Author: Chad Roberts Date: Wed Aug 2 12:20:41 2023 -0400 Adding annotation/labels for migrated objects commit 3de5aa34bcfad440ae50b0fd272325cd62d65f45 Merge: 5dc7bd729 04ea1ce7d Author: nflynt Date: Wed Aug 2 09:57:48 2023 -0400 Merge pull request #2 from crobby/0802migration Fix status function and use user copies in workUnit slices commit 04ea1ce7d83f32abda962a10ccbcc80b64cb4ada Author: Chad Roberts Date: Tue Aug 1 18:02:19 2023 -0400 Fixing status function and using copies of users in workUnit slices commit 5dc7bd7292621492deb96f417bb2b106c23ae09e Author: Nicholas Flynt Date: Tue Aug 1 16:29:15 2023 -0400 Skip over configmap updates for now, just to get the script running commit ac3afe600a1f59b7d8648c3efe0d9a4c1b9fa746 Author: Nicholas Flynt Date: Tue Aug 1 16:19:52 2023 -0400 Massively overhaul main loop, check for and handle duplicate users This is largely untested because I'm having some trouble with the configmaps code, but I wanted to get this committed before I start troubleshooting commit 5295f8f4d16e87390cc6e8f2bbd2db3558a58447 Merge: 29f93328f 552e73f89 Author: nflynt Date: Tue Aug 1 08:58:41 2023 -0400 Merge pull request #1 from crobby/tokenunmigrate Additional unmigration functionality commit 552e73f89e9ff1c71f965ea1129e2c2a59fb85cf Author: Chad Roberts Date: Mon Jul 31 13:22:26 2023 -0400 Additional unmigration functionality commit 29f93328f1ee2e8edbcda3f2538dda43b5c9e07c Author: Nicholas Flynt Date: Mon Jul 31 17:30:10 2023 -0400 Actually perform the GUID -> DN migration on the happy path And it works too! Thank goodness. Now we mostly need to clean up the logic and handle a few dozen edge cases. commit 62a6747beeaf13b818b7dadfe12d43975647fff1 Author: Nicholas Flynt Date: Mon Jul 31 12:53:43 2023 -0400 Cleanup the logs a bit, flatten the central logic with early exits commit ac20a2cdb4f1ed66f96949bebb8cbc85a4c32377 Author: Nicholas Flynt Date: Mon Jul 31 09:58:54 2023 -0400 Switch to using the scaledContext for everything Since it can do all the lookups we need, it seems silly to setup and use two different interfaces to the same underlying datastore. The UnstructuredClient is the only way we can read AD configuration right now, and we need that info, so let's stick to that method. commit 18b39d38e68791465d31fb507bcf26c78a4e7c7e Author: Nicholas Flynt Date: Fri Jul 28 17:38:27 2023 -0400 First pass at migration scaffolding, enough to do GUID -> DN lookups There is still much work to do, but at the very least we can read the relevant auth configuration details from k8s and use those details to make LDAP queries, and that's nearly all of what we need to perform the migration. --- cleanup/ad-guid-README.md | 65 +++ cleanup/ad-guid-unmigration.sh | 265 ++++++++++ cmd/agent/main.go | 12 +- pkg/agent/clean/adunmigration/ldap.go | 414 +++++++++++++++ pkg/agent/clean/adunmigration/migrate.go | 500 ++++++++++++++++++ pkg/agent/clean/adunmigration/rtbs.go | 404 ++++++++++++++ pkg/agent/clean/adunmigration/tokens.go | 131 +++++ pkg/agent/clean/adunmigration/users.go | 121 +++++ .../activedirectory_provider.go | 43 +- pkg/auth/providers/common/provider_util.go | 46 ++ pkg/multiclustermanager/app.go | 8 +- 11 files changed, 1996 insertions(+), 13 deletions(-) create mode 100644 cleanup/ad-guid-README.md create mode 100755 cleanup/ad-guid-unmigration.sh create mode 100644 pkg/agent/clean/adunmigration/ldap.go create mode 100644 pkg/agent/clean/adunmigration/migrate.go create mode 100644 pkg/agent/clean/adunmigration/rtbs.go create mode 100644 pkg/agent/clean/adunmigration/tokens.go create mode 100644 pkg/agent/clean/adunmigration/users.go create mode 100644 pkg/auth/providers/common/provider_util.go diff --git a/cleanup/ad-guid-README.md b/cleanup/ad-guid-README.md new file mode 100644 index 00000000000..d04ede95601 --- /dev/null +++ b/cleanup/ad-guid-README.md @@ -0,0 +1,65 @@ +# Active Directory GUID -> DN reverse migration utility + +**It is recommended to take a snapshot of Rancher before performing this in the event that a restore is required.** + + +## Critical Notes +* This script will delete and recreate CRTBs/PRTBs/GRBs, which may cause issues with tools (like terraform) which maintain external state. The original object names are stored in an annotation on the new objects. +* It is recommended to use this script on Rancher v2.7.6 - running this on v2.7.5 may produce performance issues +* This script requires that the Active Directory service account has permissions to read all users known to Rancher. + + +## Purpose + +In order to reverse the effects of migrating Active Directory principalIDs to be based on GUID rather than DN this +utility is required. It can be run manually via Rancher Agent, or it will automatically run inside Rancher at startup +time if no previous run is detected. +This utility will: +* Remove any users that were duplicated during the original migration toward GUID-based principalIDs in Rancher 2.7.5 +* Update objects that referenced a GUID-based principalID to reference the correct distinguished name based principalID + + +## Detailed description + +This utility will go through all Rancher users and perform an Active Directory lookup using the configured service account to +get the user's distinguished name. Next, it will perform lookups inside Rancher for all the user's Tokens, +ClusterRoleTemplateBindings, ProjectRoleTemplateBindings, and GlobalRoleBindings. If any of those objects, including the user object +itself are referencing a principalID based on the GUID of that user, those objects will be updated to reference +the distinguished name-based principalID (unless the utility is run with -dry-run, in that case the only results +are log messages indicating the changes that would be made by a run without that flag). + +This utility will also detect and correct the case where a single ActiveDirectory GUID is mapped to multiple Rancher +users. That condition was likely caused by a race in the original migration to use GUIDs and resulted in a second +Rancher user being created. This caused Rancher logins to fail for the duplicated user. The utility remedies +that situation by mapping any tokens and bindings to the original user before removing the newer user, which was +created in error. + + +## Requirements + +A Rancher environment that has Active Directory set up as the authentication provider. For any environment where +Active Directory is not the authentication provider, this utility will take no action and will exit immediately. + + +## Usage via Rancher Agent + +```bash +./ad-guid-unmigration.sh [--dry-run] [--delete-missing] +``` +* The Agent image can be found at: docker.io/rancher/rancher-agent:v2.7.6 +* The --dry-run flag will run the migration utility, but no changes to Rancher data will take place. The potential changes will be indicated in the log file. +* The --delete-missing flag will delete Rancher users that can not be found by looking them up in Active Directory. If --dry-run is set, that will prevent users from being deleted regardless of this flag. + + +## Additional notes +* The utility will create a configmap named `ad-guid-migration` in the `cattle-system` namespace. This configmap contains + a data entry with a key named "ad-guid-migration-status". If the utility is currently active, that status will be + set to "Running". After the utility has completed, the status will be set to "Finished". If a run is interrupted + prior to completion, that configmap will retain the status of "Running" and subsequent attempts to run the script will + immediately exit. In order to allow it to run again, you can either edit the configmap to remove that key or you can + delete the configmap entirely. + +* When migrating ClusterRoleTemplateBindings, ProjectRoleTemplateBindings, and GlobalRoleBindings it is necessary to perform the action + as a delete/create rather than an update. **This may cause issues if you use tooling that relies on the names of the objects**. + When a ClusterRoleTemplateBinding or a ProjectRoleTemplateBinding is migrated to a new name, the newly created object + will contain a label, "ad-guid-previous-name", that will have a value of the name of the object that was deleted. diff --git a/cleanup/ad-guid-unmigration.sh b/cleanup/ad-guid-unmigration.sh new file mode 100755 index 00000000000..39922e7cd46 --- /dev/null +++ b/cleanup/ad-guid-unmigration.sh @@ -0,0 +1,265 @@ +#!/bin/bash +# set -x +set -e + +# Text to display in the banner +banner_text="This utility will go through all Rancher users and perform an Active Directory lookup using +the configured service account to get the user's distinguished name. Next, it will perform lookups inside Rancher +for all the user's Tokens, ClusterRoleTemplateBindings, and ProjectRoleTemplateBindings. If any of those objects, +including the user object itself are referencing a principalID based on the GUID of that user, those objects will be +updated to reference the distinguished name-based principalID (unless the utility is run with -dry-run, in that case +the only results are log messages indicating the changes that would be made by a run without that flag). + +This utility will also detect and correct the case where a single ActiveDirectory GUID is mapped to multiple Rancher +users. That condition was likely caused by a race in the original migration to use GUIDs and resulted in a second +Rancher user being created. This caused Rancher logins to fail for the duplicated user. The utility remedies +that situation by mapping any tokens and bindings to the original user before removing the newer user, which was +created in error. + +It is also important to note that migration of ClusterRoleTemplateBindings and ProjectRoleTemplateBindings require +a delete/create operation rather than an update. This will result in new object names for the migrated bindings. +A label with the former object name will be included in the migrated bindings. + +The Rancher Agent image to be used with this utility can be found at rancher/rancher-agent:v2.7.6 + +It is recommended that you perform a Rancher backup prior to running this utility." + +CLEAR='\033[0m' +RED='\033[0;31m' + +# cluster resources, including the service account used to run the script +cluster_resources_yaml=$(cat << 'EOF' +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cattle-cleanup-sa + namespace: cattle-system + labels: + rancher-cleanup: "true" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cattle-cleanup-binding + labels: + rancher-cleanup: "true" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cattle-cleanup-role +subjects: + - kind: ServiceAccount + name: cattle-cleanup-sa + namespace: cattle-system +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: cattle-cleanup-job + namespace: cattle-system + labels: + rancher-cleanup: "true" +spec: + backoffLimit: 6 + completions: 1 + parallelism: 1 + selector: + template: + metadata: + creationTimestamp: null + spec: + containers: + - env: + - name: AD_GUID_CLEANUP + value: "true" + #dryrun - name: DRY_RUN + #dryrun value: "true" + #deletemissing - name: AD_DELETE_MISSING_GUID_USERS + #deletemissing value: "true" + image: agent_image + imagePullPolicy: Always + command: ["agent"] + name: cleanup-agent + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + dnsPolicy: ClusterFirst + restartPolicy: OnFailure + schedulerName: default-scheduler + securityContext: {} + serviceAccountName: cattle-cleanup-sa + terminationGracePeriodSeconds: 30 +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cattle-cleanup-role + labels: + rancher-cleanup: "true" +rules: + - apiGroups: + - '*' + resources: + - '*' + verbs: + - '*' + - nonResourceURLs: + - '*' + verbs: + - '*' +EOF +) + +# Agent image to use in the yaml file +agent_image="$1" + +show_usage() { + if [ -n "$1" ]; then + echo -e "${RED}👉 $1${CLEAR}\n"; + fi + echo "Usage: $0 AGENT_IMAGE [OPTIONS]" + echo "" + echo "Options:" + echo -e "\t-h, --help Display this help message" + echo -e "\t-n, --dry-run Display the resources that would be updated without making changes" + echo -e "\t-d, --delete-missing Permanently remove user objects whose GUID cannot be found in Active Directory" +} + +display_banner() { + local text="$1" + local border_char="=" + local text_width=$(($(tput cols))) + local border=$(printf "%${text_width}s" | tr " " "$border_char") + + echo "$border" + printf "%-${text_width}s \n" "$text" + echo "$border" + echo "Dry run: $dry_run" + echo "Delete missing: $delete_missing" + echo "Agent image: $agent_image" + if [[ "$dry_run" = true ]] && [[ "$delete_missing" = true ]] + then + echo "Setting the dry-run option to true overrides the delete-missing option. NO CHANGES WILL BE MADE." + fi + echo "$border" +} + +OPTS=$(getopt -o hnd -l help,dry-run,delete-missing -- "$@") +if [ $? != 0 ]; then + show_usage "Invalid option" + exit 1 +fi + +eval set -- "$OPTS" + +dry_run=false +delete_missing=false + +while true; do + case "$1" in + -h | --help) + show_usage + exit 0 + ;; + -n | --dry-run) + dry_run=true + shift + ;; + -d | --delete-missing) + delete_missing=true + shift + ;; + --) + shift + break + ;; + *) + show_usage "Invalid option" + exit 1 + ;; + esac +done + +shift "$((OPTIND - 1))" +# Ensure AGENT_IMAGE is provided +if [ $# -lt 1 ]; then + show_usage "AGENT_IMAGE is a required argument" + exit 1 +fi + +display_banner "${banner_text}" + +if [ "$dry_run" != true ] +then + # Check the Rancher version before doing anything. + # If it is v2.7.5, make it clear that configuration is not the recommended way to run this utility. + rancher_version=$(kubectl get settings server-version --template='{{.value}}') + if [ "$rancher_version" = "v2.7.5" ]; then + echo -e "${RED}IT IS NOT RECOMMENDED TO RUN THIS UTILITY AGAINST RANCHER VERSION v2.7.5${CLEAR}" + echo -e "${RED}IF RANCHER v.2.7.5 RESTARTS AFTER RUNNING THIS UTILITY, IT WILL UNDO THE EFFECTS OF THIS UTILITY.${CLEAR}" + echo -e "${RED}IF YOU DO WANT TO RUN THIS UTILITY, IT IS RECOMMENDED THAT YOU MAKE A BACKUP PRIOR TO CONTINUING.${CLEAR}" + read -p "Do you want to continue? (y/n): " choice + if [[ ! $choice =~ ^[Yy]$ ]]; then + echo "Exiting..." + exit 0 + fi + fi +fi + + +read -p "Do you want to continue? (y/n): " choice +if [[ ! $choice =~ ^[Yy]$ ]]; then + echo "Exiting..." + exit 0 +fi + +# apply the provided rancher agent +yaml=$(sed -e 's=agent_image='"$agent_image"'=' <<< $cluster_resources_yaml) + +if [ "$dry_run" = true ] +then + # Uncomment the env var for dry-run mode + yaml=$(sed -e 's/#dryrun // ' <<< "$yaml") +elif [ "$delete_missing" = true ] +then + # Instead uncomment the env var for missing user cleanup + yaml=$(sed -e 's/#deletemissing // ' <<< "$yaml") +fi + +echo "$yaml" | kubectl apply -f - + +# Get the pod ID to tail the logs +retry_interval=1 +max_retries=10 +retry_count=0 +pod_id="" +while [ $retry_count -lt $max_retries ]; do + pod_id=$(kubectl --namespace=cattle-system get pod -l job-name=cattle-cleanup-job -o jsonpath="{.items[0].metadata.name}") + if [ -n "$pod_id" ]; then + break + else + sleep $retry_interval + ((retry_count++)) + fi +done + +# 600 is equal to 5 minutes, because the sleep interval is 0.5 seconds +job_start_timeout=600 + +declare -i count=0 +until kubectl --namespace=cattle-system logs $pod_id -f +do + if [ $count -gt $job_start_timeout ] + then + echo "Timeout reached, check the job by running kubectl --namespace=cattle-system get jobs" + echo "To cleanup manually, you can run:" + echo " kubectl --namespace=cattle-system delete serviceaccount,job -l rancher-cleanup=true" + echo " kubectl delete clusterrole,clusterrolebinding -l rancher-cleanup=true" + exit 1 + fi + sleep 0.5 + count+=1 +done + +# Cleanup after it completes successfully +echo "$yaml" | kubectl delete -f - diff --git a/cmd/agent/main.go b/cmd/agent/main.go index f8f6dfae5a5..47a293776fd 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -24,16 +24,18 @@ import ( "github.com/docker/docker/client" "github.com/hashicorp/go-multierror" "github.com/mattn/go-colorable" + "github.com/rancher/remotedialer" + "github.com/rancher/wrangler/pkg/signals" + "github.com/sirupsen/logrus" + "github.com/rancher/rancher/pkg/agent/clean" + "github.com/rancher/rancher/pkg/agent/clean/adunmigration" "github.com/rancher/rancher/pkg/agent/cluster" "github.com/rancher/rancher/pkg/agent/node" "github.com/rancher/rancher/pkg/agent/rancher" "github.com/rancher/rancher/pkg/features" "github.com/rancher/rancher/pkg/logserver" "github.com/rancher/rancher/pkg/rkenodeconfigclient" - "github.com/rancher/remotedialer" - "github.com/rancher/wrangler/pkg/signals" - "github.com/sirupsen/logrus" ) var ( @@ -80,6 +82,10 @@ func main() { bindingErr = multierror.Append(bindingErr, err) } err = bindingErr + } else if os.Getenv("AD_GUID_CLEANUP") == "true" { + dryrun := os.Getenv("DRY_RUN") == "true" + deleteMissingUsers := os.Getenv("AD_DELETE_MISSING_GUID_USERS") == "true" + err = adunmigration.UnmigrateAdGUIDUsers(nil, dryrun, deleteMissingUsers) } else { err = run(ctx) } diff --git a/pkg/agent/clean/adunmigration/ldap.go b/pkg/agent/clean/adunmigration/ldap.go new file mode 100644 index 00000000000..9fd725d9225 --- /dev/null +++ b/pkg/agent/clean/adunmigration/ldap.go @@ -0,0 +1,414 @@ +package adunmigration + +import ( + "bytes" + "crypto/x509" + "fmt" + "os" + "regexp" + "strings" + "time" + + ldapv3 "github.com/go-ldap/ldap/v3" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" + restclient "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + + v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/auth/providers/common" + "github.com/rancher/rancher/pkg/auth/providers/common/ldap" + v3client "github.com/rancher/rancher/pkg/client/generated/management/v3" + "github.com/rancher/rancher/pkg/types/config" +) + +// Rancher 2.7.5 serialized binary GUIDs from LDAP using this pattern, so this +// is what we should match. Notably this differs from Active Directory GUID +// strings, which have dashes and braces as delimiters. +var validRancherGUIDPattern = regexp.MustCompile("^[0-9a-f]+$") + +type LdapErrorNotFound struct{} + +// Error provides a string representation of an LdapErrorNotFound +func (e LdapErrorNotFound) Error() string { + return "ldap query returned no results" +} + +// LdapFoundDuplicateGUID indicates either a configuration error or +// a corruption on the Active Directory side. In theory it should never +// be possible when talking to a real Active Directory server, but just +// in case we detect and handle it anyway. +type LdapFoundDuplicateGUID struct{} + +// Error provides a string representation of an LdapErrorNotFound +func (e LdapFoundDuplicateGUID) Error() string { + return "ldap query returned multiple users for the same GUID" +} + +type LdapConnectionPermanentlyFailed struct{} + +// Error provides a string representation of an LdapConnectionPermanentlyFailed +func (e LdapConnectionPermanentlyFailed) Error() string { + return "ldap search failed to connect after exhausting maximum retry attempts" +} + +type sharedLdapConnection struct { + lConn *ldapv3.Conn + isOpen bool +} + +func ldapConnection(config *v3.ActiveDirectoryConfig) (*ldapv3.Conn, error) { + caPool, err := newCAPool(config.Certificate) + if err != nil { + return nil, fmt.Errorf("unable to create caPool: %v", err) + } + + servers := config.Servers + TLS := config.TLS + port := config.Port + connectionTimeout := config.ConnectionTimeout + startTLS := config.StartTLS + + ldapConn, err := ldap.NewLDAPConn(servers, TLS, startTLS, port, connectionTimeout, caPool) + if err != nil { + return nil, err + } + + serviceAccountUsername := ldap.GetUserExternalID(config.ServiceAccountUsername, config.DefaultLoginDomain) + err = ldapConn.Bind(serviceAccountUsername, config.ServiceAccountPassword) + if err != nil { + return nil, err + } + return ldapConn, nil +} + +// EscapeUUID will take a UUID string in string form and will add backslashes to every 2nd character. +// The returned result is the string that needs to be added to the LDAP filter to properly filter +// by objectGUID, which is stored as binary data. +func escapeUUID(s string) string { + var buffer bytes.Buffer + var n1 = 1 + var l1 = len(s) - 1 + buffer.WriteRune('\\') + for i, r := range s { + buffer.WriteRune(r) + if i%2 == n1 && i != l1 { + buffer.WriteRune('\\') + } + } + return buffer.String() +} + +func findDistinguishedName(guid string, lConn *ldapv3.Conn, adConfig *v3.ActiveDirectoryConfig) (string, error) { + query := fmt.Sprintf("(&(%v=%v)(%v=%v))", AttributeObjectClass, adConfig.UserObjectClass, AttributeObjectGUID, escapeUUID(guid)) + search := ldapv3.NewSearchRequest(adConfig.UserSearchBase, ldapv3.ScopeWholeSubtree, ldapv3.NeverDerefAliases, + 0, 0, false, + query, ldap.GetUserSearchAttributes("memberOf", "objectClass", adConfig), nil) + + result, err := lConn.Search(search) + if err != nil { + return "", err + } + + if len(result.Entries) < 1 { + return "", LdapErrorNotFound{} + } else if len(result.Entries) > 1 { + return "", LdapFoundDuplicateGUID{} + } + + entry := result.Entries[0] + + return entry.DN, nil +} + +func findDistinguishedNameWithRetries(guid string, sLConn *sharedLdapConnection, adConfig *v3.ActiveDirectoryConfig) (string, error) { + // These settings range from 2 seconds for minor blips to around a full minute for repeated failures + backoff := wait.Backoff{ + Duration: 2 * time.Second, + Factor: 1.5, // duration multiplied by this for each retry + Jitter: 0.1, // random variance, just in case other parts of rancher are using LDAP while we work + Steps: 10, // number of retries before we consider this failure to be permanent + } + + distinguishedName := "" + err := wait.ExponentialBackoff(backoff, func() (finished bool, err error) { + if !sLConn.isOpen { + sLConn.lConn, err = ldapConnection(adConfig) + if err != nil { + logrus.Warnf("[%v] LDAP connection failed: '%v', retrying...", migrateAdUserOperation, err) + return false, err + } + sLConn.isOpen = true + } + + distinguishedName, err = findDistinguishedName(guid, sLConn.lConn, adConfig) + if err == nil || errors.Is(err, LdapErrorNotFound{}) || errors.Is(err, LdapFoundDuplicateGUID{}) { + return true, err + } + + // any other error type almost certainly indicates a connection failure. Close and re-open the connection + // before retrying + logrus.Warnf("[%v] LDAP connection failed: '%v', retrying...", migrateAdUserOperation, err) + sLConn.lConn.Close() + sLConn.isOpen = false + + return false, err + }) + + return distinguishedName, err +} + +func adConfiguration(sc *config.ScaledContext) (*v3.ActiveDirectoryConfig, error) { + authConfigs := sc.Management.AuthConfigs("") + secrets := sc.Core.Secrets("") + + authConfigObj, err := authConfigs.ObjectClient().UnstructuredClient().Get("activedirectory", metav1.GetOptions{}) + if err != nil { + logrus.Errorf("[%v] failed to obtain activedirectory authConfigObj: %v", migrateAdUserOperation, err) + return nil, err + } + + u, ok := authConfigObj.(runtime.Unstructured) + if !ok { + logrus.Errorf("[%v] failed to retrieve ActiveDirectoryConfig, cannot read k8s Unstructured data %v", migrateAdUserOperation, err) + return nil, err + } + storedADConfigMap := u.UnstructuredContent() + + storedADConfig := &v3.ActiveDirectoryConfig{} + err = common.Decode(storedADConfigMap, storedADConfig) + if err != nil { + logrus.Errorf("[%v] errors while decoding stored AD config: %v", migrateAdUserOperation, err) + return nil, err + } + + metadataMap, ok := storedADConfigMap["metadata"].(map[string]interface{}) + if !ok { + logrus.Errorf("[%v] failed to retrieve ActiveDirectoryConfig, (second step), cannot read k8s Unstructured data %v", migrateAdUserOperation, err) + return nil, err + } + + typemeta := &metav1.ObjectMeta{} + err = common.Decode(metadataMap, typemeta) + if err != nil { + logrus.Errorf("[%v] errors while decoding typemeta: %v", migrateAdUserOperation, err) + return nil, err + } + + storedADConfig.ObjectMeta = *typemeta + + logrus.Debugf("[%v] Should in theory have ActiveDirectory config data? Let's check!", migrateAdUserOperation) + logrus.Debugf("[%v] AD Service Account User: %v", migrateAdUserOperation, storedADConfig.ServiceAccountUsername) + + if storedADConfig.ServiceAccountPassword != "" { + value, err := common.ReadFromSecret(secrets, storedADConfig.ServiceAccountPassword, + strings.ToLower(v3client.ActiveDirectoryConfigFieldServiceAccountPassword)) + if err != nil { + return nil, err + } + storedADConfig.ServiceAccountPassword = value + } + + return storedADConfig, nil +} + +func newCAPool(cert string) (*x509.CertPool, error) { + pool, err := x509.SystemCertPool() + if err != nil { + return nil, err + } + pool.AppendCertsFromPEM([]byte(cert)) + return pool, nil +} + +// prepareClientContexts sets up a scaled context with the ability to read users and AD configuration data +func prepareClientContexts(clientConfig *restclient.Config) (*config.ScaledContext, *v3.ActiveDirectoryConfig, error) { + var restConfig *restclient.Config + var err error + if clientConfig != nil { + restConfig = clientConfig + } else { + restConfig, err = clientcmd.BuildConfigFromFlags("", os.Getenv("KUBECONFIG")) + if err != nil { + logrus.Errorf("[%v] failed to build the cluster config: %v", migrateAdUserOperation, err) + return nil, nil, err + } + } + + sc, err := scaledContext(restConfig) + if err != nil { + return nil, nil, err + } + adConfig, err := adConfiguration(sc) + if err != nil { + return nil, nil, err + } + + return sc, adConfig, nil +} + +func isGUID(principalID string) bool { + parts := strings.Split(principalID, "://") + if len(parts) != 2 { + logrus.Errorf("[%v] failed to parse invalid PrincipalID: %v", identifyAdUserOperation, principalID) + return false + } + return validRancherGUIDPattern.MatchString(parts[1]) +} + +func updateADConfigMigrationStatus(status map[string]string, sc *config.ScaledContext) error { + authConfigObj, err := sc.Management.AuthConfigs("").ObjectClient().UnstructuredClient().Get("activedirectory", metav1.GetOptions{}) + if err != nil { + logrus.Errorf("[%v] failed to obtain activedirecotry authConfigObj: %v", migrateAdUserOperation, err) + return err + } + + storedADConfig, ok := authConfigObj.(*unstructured.Unstructured) + if !ok { + return fmt.Errorf("[%v] expected unstructured authconfig, got %T", migrateAdUserOperation, authConfigObj) + } + + // Update annotations with migration status + annotations := storedADConfig.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + for annotation, value := range status { + // We do not mirror the actual user lists to the AuthConfig + if annotation != migrateStatusSkipped && annotation != migrateStatusMissing { + annotations[adGUIDMigrationPrefix+annotation] = value + } + } + storedADConfig.SetAnnotations(annotations) + + // Update the AuthConfig object using the unstructured client + _, err = sc.Management.AuthConfigs("").ObjectClient().UnstructuredClient().Update(storedADConfig.GetName(), storedADConfig) + if err != nil { + return fmt.Errorf("failed to update authConfig object: %v", err) + } + + return nil +} + +func migrateAllowedUserPrincipals(workunits *[]migrateUserWorkUnit, missingUsers *[]missingUserWorkUnit, sc *config.ScaledContext, dryRun bool, deleteMissingUsers bool) error { + // because we might process users in this list that have never logged in, we may need to perform LDAP + // lookups on the spot to see what their associated DN should be + sharedLConn := sharedLdapConnection{} + // this needs its own copy of the ad config, decoded with the ldap credentials fetched, so do that here + originalAdConfig, err := adConfiguration(sc) + if err != nil { + return fmt.Errorf("[%v] failed to obtain activedirectory config: %v", migrateAdUserOperation, err) + } + + authConfigObj, err := sc.Management.AuthConfigs("").ObjectClient().UnstructuredClient().Get("activedirectory", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("[%v] failed to obtain activedirectory authConfigObj: %v", migrateAdUserOperation, err) + } + + // Create an empty unstructured object to hold the decoded JSON + storedADConfig, ok := authConfigObj.(*unstructured.Unstructured) + if !ok { + return fmt.Errorf("[%v] expected unstructured authconfig, got %T", migrateAdUserOperation, authConfigObj) + } + + unstructuredMap := storedADConfig.UnstructuredContent() + unstructuredMaybeList := unstructuredMap["allowedPrincipalIds"] + listOfMaybeStrings, ok := unstructuredMaybeList.([]interface{}) + if !ok { + return fmt.Errorf("[%v] expected list for allowed principal ids, got %T", migrateAdUserOperation, unstructuredMaybeList) + } + + adWorkUnitsByPrincipal := map[string]int{} + for i, workunit := range *workunits { + adWorkUnitsByPrincipal[activeDirectoryPrefix+workunit.guid] = i + } + missingWorkUnitsByPrincipal := map[string]int{} + for i, workunit := range *missingUsers { + adWorkUnitsByPrincipal[activeDirectoryPrefix+workunit.guid] = i + } + + // we can deduplicate this list while we're at it, so we don't accidentally end up with twice the DNs + var newPrincipalIDs []string + var knownDnIDs = map[string]string{} + + for _, item := range listOfMaybeStrings { + principalID, ok := item.(string) + if !ok { + // ... what? we got a non-string? + // this is weird enough that we should consider it a hard failure for investigation + return fmt.Errorf("[%v] expected string for allowed principal id, found instead %T", migrateAdUserOperation, item) + } + + scope, err := getScope(principalID) + if err != nil { + logrus.Errorf("[%v] found invalid principal ID in allowed user list, refusing to process: %v", migrateAdUserOperation, err) + newPrincipalIDs = append(newPrincipalIDs, principalID) + } + if scope != activeDirectoryScope { + newPrincipalIDs = append(newPrincipalIDs, principalID) + } else { + if !isGUID(principalID) { + // This must be a DN-based principal; add it to the new list + knownDnIDs[principalID] = principalID + } else { + if j, exists := adWorkUnitsByPrincipal[principalID]; exists { + // This user is known and was just migrated to DN, so add their DN-based principal to the list + newPrincipalID := activeDirectoryPrefix + (*workunits)[j].distinguishedName + knownDnIDs[newPrincipalID] = newPrincipalID + } else if _, exists := missingWorkUnitsByPrincipal[principalID]; exists { + // This user is known to be missing, so we don't need to perform an LDAP lookup, we can just + // action accordingly + if !deleteMissingUsers { + newPrincipalIDs = append(newPrincipalIDs, principalID) + } + } else { + // We didn't process a user object for this GUID-based user. We need to perform an ldap + // lookup on the spot and figure out if they have an associated DN + guid, err := getExternalID(principalID) + if err != nil { + // this shouldn't be reachable, as getScope will fail first, but just for consistency... + logrus.Errorf("[%v] found invalid principal ID in allowed user list, refusing to process: %v", migrateAdUserOperation, err) + newPrincipalIDs = append(newPrincipalIDs, principalID) + } else { + dn, err := findDistinguishedNameWithRetries(guid, &sharedLConn, originalAdConfig) + if errors.Is(err, LdapConnectionPermanentlyFailed{}) || errors.Is(err, LdapFoundDuplicateGUID{}) { + // Whelp; keep this one as-is and yell about it + logrus.Errorf("[%v] ldap connection error when checking distinguished name for guid-based principal %v, skipping: %v", migrateAdUserOperation, principalID, err) + newPrincipalIDs = append(newPrincipalIDs, principalID) + } else if errors.Is(err, LdapErrorNotFound{}) { + if !deleteMissingUsers { + newPrincipalIDs = append(newPrincipalIDs, principalID) + } + } else { + newPrincipalID := activeDirectoryPrefix + dn + knownDnIDs[newPrincipalID] = newPrincipalID + } + } + } + } + } + } + + // Now that we're through processing the list and dealing with any duplicates, append the new DN-based principals + // to the end of the list + for _, principalID := range knownDnIDs { + newPrincipalIDs = append(newPrincipalIDs, principalID) + } + + if !dryRun { + unstructuredMap["allowedPrincipalIds"] = newPrincipalIDs + storedADConfig.SetUnstructuredContent(unstructuredMap) + + _, err = sc.Management.AuthConfigs("").ObjectClient().UnstructuredClient().Update("activedirectory", storedADConfig) + } else { + logrus.Infof("[%v] DRY RUN: new allowed user list will contain these principal IDs:", migrateAdUserOperation) + for _, principalID := range newPrincipalIDs { + logrus.Infof("[%v] DRY RUN: '%v'", migrateAdUserOperation, principalID) + } + } + return err +} diff --git a/pkg/agent/clean/adunmigration/migrate.go b/pkg/agent/clean/adunmigration/migrate.go new file mode 100644 index 00000000000..d3810ade7e4 --- /dev/null +++ b/pkg/agent/clean/adunmigration/migrate.go @@ -0,0 +1,500 @@ +/* +Look for any active directory users with a GUID type principal. +Convert these users to a distinguished name instead. +*/ + +package adunmigration + +import ( + "context" + "fmt" + "strconv" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + restclient "k8s.io/client-go/rest" + + v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/auth/providers/activedirectory" + "github.com/rancher/rancher/pkg/types/config" +) + +const ( + migrateAdUserOperation = "migrate-ad-user" + identifyAdUserOperation = "identify-ad-users" + migrateTokensOperation = "migrate-ad-tokens" + migrateCrtbsOperation = "migrate-ad-crtbs" + migratePrtbsOperation = "migrate-ad-prtbs" + migrateGrbsOperation = "migrate-ad-grbs" + activeDirectoryScope = "activedirectory_user" + activeDirectoryPrefix = "activedirectory_user://" + localPrefix = "local://" + adGUIDMigrationLabel = "ad-guid-migration" + adGUIDMigrationAnnotation = "ad-guid-migration-data" + adGUIDMigrationPrefix = "migration-" + migratedLabelValue = "migrated" + migrationPreviousName = "ad-guid-previous-name" + AttributeObjectClass = "objectClass" + AttributeObjectGUID = "objectGUID" + migrateStatusSkipped = "skippedUsers" + migrateStatusMissing = "missingUsers" + migrateStatusCountSuffix = "Count" + migrationStatusPercentage = "percentDone" + migrationStatusLastUpdate = "statusLastUpdated" +) + +type migrateUserWorkUnit struct { + distinguishedName string + guid string + originalUser *v3.User + duplicateUsers []*v3.User + + activeDirectoryCRTBs []v3.ClusterRoleTemplateBinding + duplicateLocalCRTBs []v3.ClusterRoleTemplateBinding + + activeDirectoryPRTBs []v3.ProjectRoleTemplateBinding + duplicateLocalPRTBs []v3.ProjectRoleTemplateBinding + + duplicateLocalGRBs []v3.GlobalRoleBinding + + activeDirectoryTokens []v3.Token + duplicateLocalTokens []v3.Token +} + +type missingUserWorkUnit struct { + guid string + originalUser *v3.User + duplicateUsers []*v3.User +} + +type skippedUserWorkUnit struct { + guid string + originalUser *v3.User +} + +func scaledContext(restConfig *restclient.Config) (*config.ScaledContext, error) { + sc, err := config.NewScaledContext(*restConfig, nil) + if err != nil { + logrus.Errorf("[%v] failed to create scaledContext: %v", migrateAdUserOperation, err) + return nil, err + } + + ctx := context.Background() + err = sc.Start(ctx) + if err != nil { + logrus.Errorf("[%v] failed to start scaled context: %v", migrateAdUserOperation, err) + return nil, err + } + + return sc, nil +} + +// UnmigrateAdGUIDUsersOnce will ensure that the migration script will run only once. cycle through all users, ctrb, ptrb, tokens and migrate them to an +// appropriate DN-based PrincipalID. +func UnmigrateAdGUIDUsersOnce(sc *config.ScaledContext) error { + migrationConfigMap, err := sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).GetNamespaced(activedirectory.StatusConfigMapNamespace, activedirectory.StatusConfigMapName, metav1.GetOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + logrus.Errorf("[%v] unable to check unmigration configmap: %v", migrateAdUserOperation, err) + logrus.Errorf("[%v] cannot determine if it is safe to proceed. refusing to run", migrateAdUserOperation) + return nil + } + if migrationConfigMap != nil { + migrationStatus := migrationConfigMap.Data[activedirectory.StatusMigrationField] + switch migrationStatus { + case activedirectory.StatusMigrationFinished: + logrus.Debugf("[%v] ad-guid migration has already been completed, refusing to run again at startup", migrateAdUserOperation) + return nil + case activedirectory.StatusMigrationFinishedWithMissing: + logrus.Infof("[%v] ad-guid migration has already been completed. To clean-up missing users, you can run the utility manually", migrateAdUserOperation) + return nil + case activedirectory.StatusMigrationFinishedWithSkipped: + logrus.Infof("[%v] ad-guid migration has already been completed. To try and resolve skipped users, you can run the utility manually", migrateAdUserOperation) + return nil + } + + } + return UnmigrateAdGUIDUsers(&sc.RESTConfig, false, false) +} + +// UnmigrateAdGUIDUsers will cycle through all users, ctrb, ptrb, tokens and migrate them to an +// appropriate DN-based PrincipalID. +func UnmigrateAdGUIDUsers(clientConfig *restclient.Config, dryRun bool, deleteMissingUsers bool) error { + if dryRun { + logrus.Infof("[%v] dryRun is true, no objects will be deleted/modified", migrateAdUserOperation) + deleteMissingUsers = false + } else if deleteMissingUsers { + logrus.Infof("[%v] deleteMissingUsers is true, GUID-based users not present in Active Directory will be deleted", migrateAdUserOperation) + } + + sc, adConfig, err := prepareClientContexts(clientConfig) + if err != nil { + return err + } + + migrationConfigMap, err := sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).GetNamespaced(activedirectory.StatusConfigMapNamespace, activedirectory.StatusConfigMapName, metav1.GetOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + logrus.Errorf("[%v] unable to check unmigration configmap: %v", migrateAdUserOperation, err) + logrus.Errorf("[%v] cannot determine if it is safe to proceed. refusing to run", migrateAdUserOperation) + return nil + } + if migrationConfigMap != nil { + migrationStatus := migrationConfigMap.Data[activedirectory.StatusMigrationField] + switch migrationStatus { + case activedirectory.StatusMigrationRunning: + logrus.Infof("[%v] ad-guid migration is currently running, refusing to run again concurrently", migrateAdUserOperation) + return nil + } + } + + finalStatus := activedirectory.StatusMigrationFinished + + // set the status to running and reset the unmigrated fields + if !dryRun { + err = updateMigrationStatus(sc, activedirectory.StatusMigrationField, activedirectory.StatusMigrationRunning) + if err != nil { + return fmt.Errorf("unable to update migration status configmap: %v", err) + } + updateUnmigratedUsers("", migrateStatusSkipped, true, sc) + updateUnmigratedUsers("", migrateStatusMissing, true, sc) + // If we return past this point, no matter how we got there, make sure we update the configmap to clear the + // status away from "running." If we fail to do this, we block AD-based logins indefinitely. + defer func(sc *config.ScaledContext, status string) { + err := updateMigrationStatus(sc, status, finalStatus) + if err != nil { + logrus.Errorf("[%v] unable to update migration status configmap: %v", migrateAdUserOperation, err) + } + }(sc, activedirectory.StatusMigrationField) + + // Early bail: if the AD configuration is disabled, then we're done! Update the configmap right now and exit. + if !adConfig.Enabled { + logrus.Infof("[%v] during unmigration, found that Active Directory is not enabled. nothing to do", migrateAdUserOperation) + finalStatus = activedirectory.StatusMigrationFinished + return nil + } + } + + users, err := sc.Management.Users("").List(metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("unable to fetch user list: %v", err) + } + + usersToMigrate, missingUsers, skippedUsers := identifyMigrationWorkUnits(users, adConfig) + // If any of the below functions fail, there is either a permissions problem or a more serious issue with the + // Rancher API. We should bail in this case and not attempt to process users. + err = collectTokens(&usersToMigrate, sc) + if err != nil { + finalStatus = activedirectory.StatusMigrationFailed + return err + } + err = collectCRTBs(&usersToMigrate, sc) + if err != nil { + finalStatus = activedirectory.StatusMigrationFailed + return err + } + err = collectPRTBs(&usersToMigrate, sc) + if err != nil { + finalStatus = activedirectory.StatusMigrationFailed + return err + } + err = collectGRBs(&usersToMigrate, sc) + if err != nil { + finalStatus = activedirectory.StatusMigrationFailed + return err + } + + if len(missingUsers) > 0 { + finalStatus = activedirectory.StatusMigrationFinishedWithMissing + } + if len(skippedUsers) > 0 { + finalStatus = activedirectory.StatusMigrationFinishedWithSkipped + } + + for _, user := range skippedUsers { + logrus.Errorf("[%v] unable to migrate user '%v' due to a connection failure; this user will be skipped", + migrateAdUserOperation, user.originalUser.Name) + if !dryRun { + updateUnmigratedUsers(user.originalUser.Name, migrateStatusSkipped, false, sc) + } + } + for _, missingUser := range missingUsers { + if deleteMissingUsers && !dryRun { + logrus.Infof("[%v] user '%v' with GUID '%v' does not seem to exist in Active Directory. deleteMissingUsers is true, proceeding to delete this user permanently", migrateAdUserOperation, missingUser.originalUser.Name, missingUser.guid) + updateUnmigratedUsers(missingUser.originalUser.Name, migrateStatusMissing, false, sc) + err = sc.Management.Users("").Delete(missingUser.originalUser.Name, &metav1.DeleteOptions{}) + if err != nil { + logrus.Errorf("[%v] failed to delete missing user '%v' with: %v", migrateAdUserOperation, missingUser.originalUser.Name, err) + } + } else { + logrus.Infof("[%v] User '%v' with GUID '%v' does not seem to exist in Active Directory. this user will be skipped", migrateAdUserOperation, missingUser.originalUser.Name, missingUser.guid) + if !dryRun { + updateUnmigratedUsers(missingUser.originalUser.Name, migrateStatusMissing, false, sc) + } + } + } + + for i, userToMigrate := range usersToMigrate { + // Note: some resources may fail to migrate due to webhook constraints; this applies especially to bindings + // that refer to disabled templates, as rancher won't allow us to create the replacements. We'll log these + // errors, but do not consider them to be serious enough to stop processing the remainder of each user's work. + migrateCRTBs(&userToMigrate, sc, dryRun) + migratePRTBs(&userToMigrate, sc, dryRun) + migrateGRBs(&userToMigrate, sc, dryRun) + migrateTokens(&userToMigrate, sc, dryRun) + replaceGUIDPrincipalWithDn(userToMigrate.originalUser, userToMigrate.distinguishedName, userToMigrate.guid, dryRun) + + if dryRun { + describePlannedChanges(userToMigrate) + } else { + err = deleteDuplicateUsers(userToMigrate, sc) + if err == nil { + updateModifiedUser(userToMigrate, sc) + } + percentDone := float64(i+1) / float64(len(usersToMigrate)) * 100 + progress := fmt.Sprintf("%.0f%%", percentDone) + err = updateMigrationStatus(sc, migrationStatusPercentage, progress) + if err != nil { + logrus.Errorf("unable to update migration status: %v", err) + } + } + } + + err = migrateAllowedUserPrincipals(&usersToMigrate, &missingUsers, sc, dryRun, deleteMissingUsers) + if err != nil { + finalStatus = activedirectory.StatusMigrationFailed + return err + } + + return nil +} + +// identifyMigrationWorkUnits locates ActiveDirectory users with GUID and DN based principal IDs and sorts them +// into work units based on whether those users can be located in the upstream Active Directory provider. Specifically: +// +// usersToMigrate contains GUID-based original users and any duplicates (GUID or DN based) that we wish to merge +// missingUsers contains GUID-based users who could not be found in Active Directory +// skippedUsers contains GUID-based users that could not be processed, usually due to an LDAP connection failure +func identifyMigrationWorkUnits(users *v3.UserList, adConfig *v3.ActiveDirectoryConfig) ( + []migrateUserWorkUnit, []missingUserWorkUnit, []skippedUserWorkUnit) { + // Note: we *could* make the ldap connection on the spot here, but we're accepting it as a parameter specifically + // so that this function is easier to test. This setup allows us to mock the ldap connection and thus more easily + // test unusual Active Directory responses to our searches. + + var usersToMigrate []migrateUserWorkUnit + var missingUsers []missingUserWorkUnit + var skippedUsers []skippedUserWorkUnit + + // These assist with quickly identifying duplicates, so we don't have to scan the whole structure each time. + // We key on guid/dn, and the value is the index of that work unit in the associated table + knownGUIDWorkUnits := map[string]int{} + knownGUIDMissingUnits := map[string]int{} + knownDnWorkUnits := map[string]int{} + + // We'll reuse a shared ldap connection to speed up lookups. We need to declare that here, but we'll defer + // starting the connection until the first time a lookup is performed + sharedLConn := sharedLdapConnection{} + + // Now we'll make two passes over the list of all users. First we need to identify any GUID based users, and + // sort them into "found" and "not found" lists. At this stage we might have GUID-based duplicates, and we'll + // detect and sort those accordingly + ldapPermanentlyFailed := false + logrus.Debugf("[%v] locating GUID-based Active Directory users", identifyAdUserOperation) + for _, user := range users.Items { + if !isAdUser(&user) { + logrus.Debugf("[%v] user '%v' has no AD principals, skipping", identifyAdUserOperation, user.Name) + continue + } + principalID := adPrincipalID(&user) + logrus.Debugf("[%v] processing AD User '%v' with principal ID: '%v'", identifyAdUserOperation, user.Name, principalID) + if !isGUID(principalID) { + logrus.Debugf("[%v] '%v' does not appear to be a GUID-based principal ID, taking no action", identifyAdUserOperation, principalID) + continue + } + guid, err := getExternalID(principalID) + + if err != nil { + // This really shouldn't be possible to hit, since isGuid will fail to parse anything that would + // cause getExternalID to choke on the input, but for maximum safety we'll handle it anyway. + logrus.Errorf("[%v] failed to extract GUID from principal '%v', cannot process user: '%v'", identifyAdUserOperation, err, user.Name) + continue + } + // If our LDAP connection has gone sour, we still need to log this user for reporting + userCopy := user.DeepCopy() + if ldapPermanentlyFailed { + skippedUsers = append(skippedUsers, skippedUserWorkUnit{guid: guid, originalUser: userCopy}) + } else { + // Check for guid-based duplicates here. If we find one, we don't need to perform an other LDAP lookup. + if i, exists := knownGUIDWorkUnits[guid]; exists { + logrus.Debugf("[%v] user %v is GUID-based (%v) and a duplicate of %v", + identifyAdUserOperation, user.Name, guid, usersToMigrate[i].originalUser.Name) + // Make sure the oldest duplicate user is selected as the original + if usersToMigrate[i].originalUser.CreationTimestamp.Time.After(user.CreationTimestamp.Time) { + usersToMigrate[i].duplicateUsers = append(usersToMigrate[i].duplicateUsers, usersToMigrate[i].originalUser) + usersToMigrate[i].originalUser = userCopy + } else { + usersToMigrate[i].duplicateUsers = append(usersToMigrate[i].duplicateUsers, userCopy) + } + continue + } + if i, exists := knownGUIDMissingUnits[guid]; exists { + logrus.Debugf("[%v] user %v is GUID-based (%v) and a duplicate of %v which is known to be missing", + identifyAdUserOperation, user.Name, guid, missingUsers[i].originalUser.Name) + // We're less picky about the age of the oldest user here, because we aren't going to deduplicate these + missingUsers[i].duplicateUsers = append(missingUsers[i].duplicateUsers, userCopy) + continue + } + dn, err := findDistinguishedNameWithRetries(guid, &sharedLConn, adConfig) + if errors.Is(err, LdapConnectionPermanentlyFailed{}) { + logrus.Warnf("[%v] LDAP connection has permanently failed! will continue to migrate previously identified users", identifyAdUserOperation) + skippedUsers = append(skippedUsers, skippedUserWorkUnit{guid: guid, originalUser: userCopy}) + ldapPermanentlyFailed = true + } else if errors.Is(err, LdapFoundDuplicateGUID{}) { + logrus.Errorf("[%v] LDAP returned multiple users with GUID '%v'. this should not be possible, and may indicate a configuration error! this user will be skipped", identifyAdUserOperation, guid) + skippedUsers = append(skippedUsers, skippedUserWorkUnit{guid: guid, originalUser: userCopy}) + } else if errors.Is(err, LdapErrorNotFound{}) { + logrus.Debugf("[%v] user %v is GUID-based (%v) and the Active Directory server doesn't know about it. marking it as missing", identifyAdUserOperation, user.Name, guid) + knownGUIDMissingUnits[guid] = len(missingUsers) + missingUsers = append(missingUsers, missingUserWorkUnit{guid: guid, originalUser: userCopy}) + } else { + logrus.Debugf("[%v] user %v is GUID-based (%v) and the Active Directory server knows it by the Distinguished Name '%v'", identifyAdUserOperation, user.Name, guid, dn) + knownGUIDWorkUnits[guid] = len(usersToMigrate) + knownDnWorkUnits[dn] = len(usersToMigrate) + var emptyDuplicateList []*v3.User + usersToMigrate = append(usersToMigrate, migrateUserWorkUnit{guid: guid, distinguishedName: dn, originalUser: userCopy, duplicateUsers: emptyDuplicateList}) + } + } + } + + if sharedLConn.isOpen { + sharedLConn.lConn.Close() + } + + if len(usersToMigrate) == 0 { + logrus.Debugf("[%v] found 0 users in need of migration, exiting without checking for DN-based duplicates", identifyAdUserOperation) + return usersToMigrate, missingUsers, skippedUsers + } + + // Now for the second pass, we need to identify DN-based users, and see if they are duplicates of any of the GUID + // users that we found in the first pass. We'll prefer the oldest user as the originalUser object, this will be + // the one we keep when we resolve duplicates later. + logrus.Debugf("[%v] locating any DN-based Active Directory users", identifyAdUserOperation) + for _, user := range users.Items { + if !isAdUser(&user) { + logrus.Debugf("[%v] user '%v' has no AD principals, skipping", identifyAdUserOperation, user.Name) + continue + } + principalID := adPrincipalID(&user) + logrus.Debugf("[%v] processing AD User '%v' with principal ID: '%v'", identifyAdUserOperation, user.Name, principalID) + if isGUID(principalID) { + logrus.Debugf("[%v] '%v' does not appear to be a DN-based principal ID, taking no action", identifyAdUserOperation, principalID) + continue + } + dn, err := getExternalID(principalID) + if err != nil { + logrus.Errorf("[%v] failed to extract DN from principal '%v', cannot process user: '%v'", identifyAdUserOperation, err, user.Name) + continue + } + if i, exists := knownDnWorkUnits[dn]; exists { + logrus.Debugf("[%v] user %v is DN-based (%v), and a duplicate of %v", + identifyAdUserOperation, user.Name, dn, usersToMigrate[i].originalUser.Name) + // Make sure the oldest duplicate user is selected as the original + userCopy := user.DeepCopy() + if usersToMigrate[i].originalUser.CreationTimestamp.Time.After(user.CreationTimestamp.Time) { + usersToMigrate[i].duplicateUsers = append(usersToMigrate[i].duplicateUsers, usersToMigrate[i].originalUser) + usersToMigrate[i].originalUser = userCopy + } else { + usersToMigrate[i].duplicateUsers = append(usersToMigrate[i].duplicateUsers, userCopy) + } + } + } + + return usersToMigrate, missingUsers, skippedUsers +} + +func workUnitContainsName(workunit *migrateUserWorkUnit, name string) bool { + if workunit.originalUser.Name == name { + return true + } + for _, duplicateLocalUser := range workunit.duplicateUsers { + if duplicateLocalUser.Name == name { + return true + } + } + return false +} + +func updateMigrationStatus(sc *config.ScaledContext, status string, value string) error { + cm, err := sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).Get(activedirectory.StatusConfigMapName, metav1.GetOptions{}) + if err != nil { + // Create a new ConfigMap if it doesn't exist + if !apierrors.IsNotFound(err) { + return err + } + cm = &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: activedirectory.StatusConfigMapName, + Namespace: activedirectory.StatusConfigMapNamespace, + }, + } + } + if cm.Data == nil { + cm.Data = map[string]string{} + } + cm.Data[status] = value + cm.Data[migrationStatusLastUpdate] = metav1.Now().Format(time.RFC3339) + + if _, err := sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).Update(cm); err != nil { + // If the ConfigMap does not exist, create it + if apierrors.IsNotFound(err) { + _, err = sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).Create(cm) + if err != nil { + return fmt.Errorf("[%v] unable to create migration status configmap: %v", migrateAdUserOperation, err) + } + } + } + err = updateADConfigMigrationStatus(cm.Data, sc) + if err != nil { + return fmt.Errorf("unable to update AuthConfig status: %v", err) + } + return nil +} + +// updateUnmigratedUsers will add a user to the list for the specified migration status in the migration status configmap. +// If reset is set to true, it will empty the list. +func updateUnmigratedUsers(user string, status string, reset bool, sc *config.ScaledContext) { + cm, err := sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).Get(activedirectory.StatusConfigMapName, metav1.GetOptions{}) + if err != nil { + logrus.Errorf("[%v] unable to fetch configmap to update %v users: %v", migrateAdUserOperation, status, err) + } + var currentList string + if reset { + delete(cm.Data, status) + delete(cm.Data, status+migrateStatusCountSuffix) + } else { + currentList = cm.Data[status] + if currentList == "" { + currentList = currentList + user + } else { + currentList = currentList + "," + user + } + count := strconv.Itoa(len(strings.Split(currentList, ","))) + cm.Data[status+migrateStatusCountSuffix] = count + cm.Data[status] = currentList + } + + cm.Data[migrationStatusLastUpdate] = metav1.Now().Format(time.RFC3339) + if _, err := sc.Core.ConfigMaps(activedirectory.StatusConfigMapNamespace).Update(cm); err != nil { + if err != nil { + logrus.Errorf("[%v] unable to update migration status configmap: %v", migrateAdUserOperation, err) + } + } + err = updateADConfigMigrationStatus(cm.Data, sc) + if err != nil { + logrus.Errorf("unable to update AuthConfig status: %v", err) + } +} diff --git a/pkg/agent/clean/adunmigration/rtbs.go b/pkg/agent/clean/adunmigration/rtbs.go new file mode 100644 index 00000000000..9e097fdfe8a --- /dev/null +++ b/pkg/agent/clean/adunmigration/rtbs.go @@ -0,0 +1,404 @@ +package adunmigration + +import ( + "fmt" + "time" + + "github.com/sirupsen/logrus" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + + v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + v3norman "github.com/rancher/rancher/pkg/generated/norman/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/types/config" +) + +// principalsToMigrate collects workunits whose resources we wish to migrate into two groups: +// +// adWorkUnitsByPrincipal - resources should be migrated to an ActiveDirectory principal with a Distinguished Name +// duplicateLocalWorkUnitsByPrincipal - resources should be migrated to the local ID of the original (kept) user +func principalsToMigrate(workunits *[]migrateUserWorkUnit) (adWorkUnitsByPrincipal map[string]int, duplicateLocalWorkUnitsByPrincipal map[string]int) { + // first build a map of guid-principalid -> work unit, which will make the following logic more efficient + adWorkUnitsByPrincipal = map[string]int{} + duplicateLocalWorkUnitsByPrincipal = map[string]int{} + + for i, workunit := range *workunits { + adWorkUnitsByPrincipal[activeDirectoryPrefix+workunit.guid] = i + for j := range workunit.duplicateUsers { + duplicateLocalWorkUnitsByPrincipal[activeDirectoryPrefix+workunit.guid] = i + duplicateLocalWorkUnitsByPrincipal[activeDirectoryPrefix+workunit.distinguishedName] = i + duplicateLocalWorkUnitsByPrincipal[localPrefix+workunit.duplicateUsers[j].Name] = i + } + } + + return adWorkUnitsByPrincipal, duplicateLocalWorkUnitsByPrincipal +} + +func collectCRTBs(workunits *[]migrateUserWorkUnit, sc *config.ScaledContext) error { + crtbInterface := sc.Management.ClusterRoleTemplateBindings("") + crtbList, err := crtbInterface.List(metav1.ListOptions{}) + if err != nil { + logrus.Errorf("[%v] unable to fetch CRTB objects: %v", migrateAdUserOperation, err) + return err + } + + adWorkUnitsByPrincipal, duplicateLocalWorkUnitsByPrincipal := principalsToMigrate(workunits) + + for _, crtb := range crtbList.Items { + if index, exists := adWorkUnitsByPrincipal[crtb.UserPrincipalName]; exists { + if workUnitContainsName(&(*workunits)[index], crtb.UserName) { + (*workunits)[index].activeDirectoryCRTBs = append((*workunits)[index].activeDirectoryCRTBs, crtb) + } else { + logrus.Warnf("[%v] found CRTB for user with guid-based principal '%v' and name '%v', but no user object with that name matches the GUID or its associated DN. refusing to process", + identifyAdUserOperation, crtb.UserPrincipalName, crtb.UserName) + } + } else if index, exists = duplicateLocalWorkUnitsByPrincipal[crtb.UserPrincipalName]; exists { + if workUnitContainsName(&(*workunits)[index], crtb.UserName) { + (*workunits)[index].duplicateLocalCRTBs = append((*workunits)[index].duplicateLocalCRTBs, crtb) + } else { + logrus.Warnf("[%v] found CRTB for user with guid-based principal '%v' and name '%v', but no user object with that name matches the GUID or its associated DN. refusing to process", + identifyAdUserOperation, crtb.UserPrincipalName, crtb.UserName) + } + } + } + + return nil +} + +func collectPRTBs(workunits *[]migrateUserWorkUnit, sc *config.ScaledContext) error { + prtbInterface := sc.Management.ProjectRoleTemplateBindings("") + prtbList, err := prtbInterface.List(metav1.ListOptions{}) + if err != nil { + logrus.Errorf("[%v] unable to fetch PRTB objects: %v", migrateAdUserOperation, err) + return err + } + + adWorkUnitsByPrincipal, duplicateLocalWorkUnitsByPrincipal := principalsToMigrate(workunits) + + for _, prtb := range prtbList.Items { + if index, exists := adWorkUnitsByPrincipal[prtb.UserPrincipalName]; exists { + if workUnitContainsName(&(*workunits)[index], prtb.UserName) { + (*workunits)[index].activeDirectoryPRTBs = append((*workunits)[index].activeDirectoryPRTBs, prtb) + } else { + logrus.Warnf("[%v] found PRTB for user with guid-based principal '%v' and name '%v', but no user object with that name matches the GUID or its associated DN. refusing to process", + identifyAdUserOperation, prtb.UserPrincipalName, prtb.UserName) + } + } else if index, exists = duplicateLocalWorkUnitsByPrincipal[prtb.UserPrincipalName]; exists { + if workUnitContainsName(&(*workunits)[index], prtb.UserName) { + (*workunits)[index].duplicateLocalPRTBs = append((*workunits)[index].duplicateLocalPRTBs, prtb) + } else { + logrus.Warnf("[%v] found PRTB for user with guid-based principal '%v' and name '%v', but no user object with that name matches the GUID or its associated DN. refusing to process", + identifyAdUserOperation, prtb.UserPrincipalName, prtb.UserName) + } + } + } + + return nil +} + +func collectGRBs(workunits *[]migrateUserWorkUnit, sc *config.ScaledContext) error { + grbInterface := sc.Management.GlobalRoleBindings("") + grbList, err := grbInterface.List(metav1.ListOptions{}) + if err != nil { + logrus.Errorf("[%v] unable to fetch GRB objects: %v", migrateAdUserOperation, err) + return err + } + + duplicateLocalWorkUnitsByName := map[string]int{} + + for _, workunit := range *workunits { + for j := range workunit.duplicateUsers { + duplicateLocalWorkUnitsByName[workunit.duplicateUsers[j].Name] = j + } + } + + for _, grb := range grbList.Items { + if index, exists := duplicateLocalWorkUnitsByName[grb.UserName]; exists { + (*workunits)[index].duplicateLocalGRBs = append((*workunits)[index].duplicateLocalGRBs, grb) + } + } + + return nil +} + +func updateCRTB(crtbInterface v3norman.ClusterRoleTemplateBindingInterface, oldCrtb *v3.ClusterRoleTemplateBinding, userName string, principalID string) error { + newAnnotations := oldCrtb.Annotations + if newAnnotations == nil { + newAnnotations = make(map[string]string) + } + newAnnotations[adGUIDMigrationAnnotation] = oldCrtb.UserPrincipalName + newLabels := oldCrtb.Labels + if newLabels == nil { + newLabels = make(map[string]string) + } + newLabels[migrationPreviousName] = oldCrtb.Name + newLabels[adGUIDMigrationLabel] = migratedLabelValue + newCrtb := &v3.ClusterRoleTemplateBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "", + Namespace: oldCrtb.ObjectMeta.Namespace, + GenerateName: "crtb-", + Annotations: newAnnotations, + Labels: newLabels, + }, + ClusterName: oldCrtb.ClusterName, + UserName: userName, + RoleTemplateName: oldCrtb.RoleTemplateName, + UserPrincipalName: principalID, + } + + // If we get an internal error during any of these ops, there's a good chance the webhook is overwhelmed. + // We'll take the opportunity to rate limit ourselves and try again a few times. + + backoff := wait.Backoff{ + Duration: 5 * time.Second, + Factor: 1.1, + Jitter: 0.1, + Steps: 10, + } + + err := wait.ExponentialBackoff(backoff, func() (finished bool, err error) { + _, err = crtbInterface.Create(newCrtb) + if err != nil { + if apierrors.IsInternalError(err) { + logrus.Errorf("[%v] internal error while creating crtb, will backoff and retry: %v", migrateCrtbsOperation, err) + return false, err + } + return true, fmt.Errorf("[%v] unable to create new CRTB: %w", migrateCrtbsOperation, err) + } + return true, nil + }) + if err != nil { + return fmt.Errorf("[%v] permanent error when creating crtb, giving up: %v", migrateCrtbsOperation, err) + } + + err = wait.ExponentialBackoff(backoff, func() (finished bool, err error) { + err = crtbInterface.DeleteNamespaced(oldCrtb.Namespace, oldCrtb.Name, &metav1.DeleteOptions{}) + if err != nil { + if apierrors.IsInternalError(err) { + logrus.Errorf("[%v] internal error while deleting crtb, will backoff and retry: %v", migrateCrtbsOperation, err) + return false, err + } + return true, fmt.Errorf("[%v] unable to delete old CRTB: %w", migrateCrtbsOperation, err) + } + return true, nil + }) + if err != nil { + return fmt.Errorf("[%v] permanent error when deleting crtb, giving up: %v", migrateCrtbsOperation, err) + } + + return nil +} + +func migrateCRTBs(workunit *migrateUserWorkUnit, sc *config.ScaledContext, dryRun bool) { + crtbInterface := sc.Management.ClusterRoleTemplateBindings("") + // First convert all GUID-based CRTBs to their equivalent Distinguished Name variants + dnPrincipalID := activeDirectoryPrefix + workunit.distinguishedName + for _, oldCrtb := range workunit.activeDirectoryCRTBs { + if dryRun { + logrus.Infof("[%v] DRY RUN: would migrate CRTB '%v' from GUID principal '%v' to DN principal '%v'. "+ + "Additionally, an annotation, %v, would be added containing the principal being migrated from and"+ + "labels, %v and %v, that will contain the name of the previous CRTB and indicate that this CRTB has been migrated.", + migrateCrtbsOperation, oldCrtb.Name, oldCrtb.UserPrincipalName, dnPrincipalID, adGUIDMigrationAnnotation, migrationPreviousName, adGUIDMigrationLabel) + } else { + err := updateCRTB(crtbInterface, &oldCrtb, workunit.originalUser.Name, dnPrincipalID) + if err != nil { + logrus.Errorf("[%v] error while migrating CRTBs for user '%v': %v", migrateCrtbsOperation, workunit.originalUser.Name, err) + } + } + } + // Now do the same for Local ID bindings on the users we are about to delete, pointing them instead to the merged + // original user that we will be keeping + localPrincipalID := localPrefix + workunit.originalUser.Name + for _, oldCrtb := range workunit.duplicateLocalCRTBs { + if dryRun { + logrus.Infof("[%v] DRY RUN: would migrate CRTB '%v' from duplicate local user '%v' to original user '%v'"+ + "Additionally, an annotation, %v, would be added containing the principal being migrated from and"+ + "labels, %v and %v, that will contain the name of the previous CRTB and indicate that this CRTB has been migrated.", + migrateCrtbsOperation, oldCrtb.Name, oldCrtb.UserPrincipalName, localPrincipalID, adGUIDMigrationAnnotation, migrationPreviousName, adGUIDMigrationLabel) + } else { + err := updateCRTB(crtbInterface, &oldCrtb, workunit.originalUser.Name, localPrincipalID) + if err != nil { + logrus.Errorf("[%v] error while migrating crtbs for user '%v': %v", migrateCrtbsOperation, workunit.originalUser.Name, err) + } + } + } +} + +func updatePRTB(prtbInterface v3norman.ProjectRoleTemplateBindingInterface, oldPrtb *v3.ProjectRoleTemplateBinding, userName string, principalID string) error { + newAnnotations := oldPrtb.Annotations + if newAnnotations == nil { + newAnnotations = make(map[string]string) + } + newAnnotations[adGUIDMigrationAnnotation] = oldPrtb.UserPrincipalName + newLabels := oldPrtb.Labels + if newLabels == nil { + newLabels = make(map[string]string) + } + newLabels[migrationPreviousName] = oldPrtb.Name + newLabels[adGUIDMigrationLabel] = migratedLabelValue + newPrtb := &v3.ProjectRoleTemplateBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "", + Namespace: oldPrtb.ObjectMeta.Namespace, + GenerateName: "prtb-", + Annotations: newAnnotations, + Labels: newLabels, + }, + ProjectName: oldPrtb.ProjectName, + UserName: userName, + RoleTemplateName: oldPrtb.RoleTemplateName, + UserPrincipalName: principalID, + } + + // If we get an internal error during any of these ops, there's a good chance the webhook is overwhelmed. + // We'll take the opportunity to rate limit ourselves and try again a few times. + + backoff := wait.Backoff{ + Duration: 5 * time.Second, + Factor: 1.1, + Jitter: 0.1, + Steps: 10, + } + + err := wait.ExponentialBackoff(backoff, func() (finished bool, err error) { + _, err = prtbInterface.Create(newPrtb) + if err != nil { + if apierrors.IsInternalError(err) { + logrus.Errorf("[%v] internal error while creating prtb, will backoff and retry: %v", migratePrtbsOperation, err) + return false, err + } + return true, fmt.Errorf("[%v] unable to create new PRTB: %w", migratePrtbsOperation, err) + } + return true, nil + }) + if err != nil { + return fmt.Errorf("[%v] permanent error when creating prtb, giving up: %v", migratePrtbsOperation, err) + } + + err = wait.ExponentialBackoff(backoff, func() (finished bool, err error) { + err = prtbInterface.DeleteNamespaced(oldPrtb.Namespace, oldPrtb.Name, &metav1.DeleteOptions{}) + if err != nil { + if apierrors.IsInternalError(err) { + logrus.Errorf("[%v] internal error while deleting prtb, will backoff and retry: %v", migratePrtbsOperation, err) + return false, err + } + return true, fmt.Errorf("[%v] unable to delete old PRTB: %w", migratePrtbsOperation, err) + } + return true, nil + }) + if err != nil { + return fmt.Errorf("[%v] permanent error when deleting prtb, giving up: %v", migratePrtbsOperation, err) + } + + return nil +} + +func migratePRTBs(workunit *migrateUserWorkUnit, sc *config.ScaledContext, dryRun bool) { + prtbInterface := sc.Management.ProjectRoleTemplateBindings("") + // First convert all GUID-based PRTBs to their equivalent Distinguished Name variants + dnPrincipalID := activeDirectoryPrefix + workunit.distinguishedName + for _, oldPrtb := range workunit.activeDirectoryPRTBs { + if dryRun { + logrus.Infof("[%v] DRY RUN: would migrate PRTB '%v' from GUID principal '%v' to DN principal '%v'. "+ + "Additionally, an annotation, %v, would be added containing the principal being migrated from and"+ + "labels, %v and %v, that will contain the name of the previous PRTB and indicate that this PRTB has been migrated.", + migratePrtbsOperation, oldPrtb.Name, oldPrtb.UserPrincipalName, dnPrincipalID, adGUIDMigrationAnnotation, migrationPreviousName, adGUIDMigrationLabel) + + } else { + err := updatePRTB(prtbInterface, &oldPrtb, workunit.originalUser.Name, dnPrincipalID) + if err != nil { + logrus.Errorf("[%v] error while migrating prtbs for user '%v': %v", migratePrtbsOperation, workunit.originalUser.Name, err) + } + } + } + // Now do the same for Local ID bindings on the users we are about to delete, pointing them instead to the merged + // original user that we will be keeping + localPrincipalID := localPrefix + workunit.originalUser.Name + for _, oldPrtb := range workunit.duplicateLocalPRTBs { + if dryRun { + logrus.Infof("[%v] DRY RUN: would migrate PRTB '%v' from duplicate local user '%v' to original user '%v'"+ + "Additionally, an annotation, %v, would be added containing the principal being migrated from and"+ + "labels, %v and %v, that will contain the name of the previous PRTB and indicate that this PRTB has been migrated.", + migratePrtbsOperation, oldPrtb.Name, oldPrtb.UserPrincipalName, localPrincipalID, adGUIDMigrationAnnotation, migrationPreviousName, adGUIDMigrationLabel) + + } else { + err := updatePRTB(prtbInterface, &oldPrtb, workunit.originalUser.Name, localPrincipalID) + if err != nil { + logrus.Errorf("[%v] error while migrating prtbs for user '%v': %v", migratePrtbsOperation, workunit.originalUser.Name, err) + } + } + } +} + +func migrateGRBs(workunit *migrateUserWorkUnit, sc *config.ScaledContext, dryRun bool) { + grbInterface := sc.Management.GlobalRoleBindings("") + + backoff := wait.Backoff{ + Duration: 5 * time.Second, + Factor: 1.1, + Jitter: 0.1, + Steps: 10, + } + + for _, oldGrb := range workunit.duplicateLocalGRBs { + if dryRun { + logrus.Infof("[%v] DRY RUN: would migrate GRB '%v' from duplicate local user '%v' to original user '%v'"+ + "Additionally, labels %v and %v will be added. These contain the name of the previous GRB and indicate that this GRB has been migrated.", + migrateGrbsOperation, oldGrb.Name, oldGrb.UserName, workunit.originalUser.Name, migrationPreviousName, adGUIDMigrationLabel) + } else { + newLabels := oldGrb.Labels + if newLabels == nil { + newLabels = make(map[string]string) + } + newLabels[migrationPreviousName] = oldGrb.Name + newLabels[adGUIDMigrationLabel] = migratedLabelValue + + newGrb := &v3.GlobalRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "", + GenerateName: "grb-", + Annotations: oldGrb.Annotations, + Labels: newLabels, + }, + GlobalRoleName: oldGrb.GlobalRoleName, + GroupPrincipalName: oldGrb.GroupPrincipalName, + UserName: workunit.originalUser.Name, + } + + err := wait.ExponentialBackoff(backoff, func() (finished bool, err error) { + _, err = grbInterface.Create(newGrb) + if err != nil { + if apierrors.IsInternalError(err) { + logrus.Errorf("[%v] internal error while creating GRB, will backoff and retry: %v", migrateGrbsOperation, err) + return false, err + } else { + return true, fmt.Errorf("[%v] unable to create new GRB: %w", migrateGrbsOperation, err) + } + } + return true, nil + }) + if err != nil { + logrus.Errorf("[%v] permanent error while creating GRB, giving up: %v", migrateGrbsOperation, err) + continue + } + + err = wait.ExponentialBackoff(backoff, func() (finished bool, err error) { + err = sc.Management.GlobalRoleBindings("").Delete(oldGrb.Name, &metav1.DeleteOptions{}) + if err != nil { + if apierrors.IsInternalError(err) { + logrus.Errorf("[%v] internal error while deleting GRB, will backoff and retry: %v", migrateGrbsOperation, err) + return false, err + } else { + return true, fmt.Errorf("[%v] unable to delete old GRB: %w", migrateGrbsOperation, err) + } + } + return true, nil + }) + if err != nil { + logrus.Errorf("[%v] permanent error when deleting GRB, giving up: %v", migrateGrbsOperation, err) + } + } + } +} diff --git a/pkg/agent/clean/adunmigration/tokens.go b/pkg/agent/clean/adunmigration/tokens.go new file mode 100644 index 00000000000..c24f1d34192 --- /dev/null +++ b/pkg/agent/clean/adunmigration/tokens.go @@ -0,0 +1,131 @@ +package adunmigration + +import ( + "fmt" + "time" + + v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + v3norman "github.com/rancher/rancher/pkg/generated/norman/management.cattle.io/v3" + "github.com/sirupsen/logrus" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/rancher/rancher/pkg/auth/tokens" + "github.com/rancher/rancher/pkg/types/config" +) + +func collectTokens(workunits *[]migrateUserWorkUnit, sc *config.ScaledContext) error { + tokenInterface := sc.Management.Tokens("") + tokenList, err := tokenInterface.List(metav1.ListOptions{}) + if err != nil { + logrus.Errorf("[%v] unable to fetch token objects: %v", migrateAdUserOperation, err) + return err + } + + adWorkUnitsByPrincipal, duplicateLocalWorkUnitsByPrincipal := principalsToMigrate(workunits) + + for _, token := range tokenList.Items { + if index, exists := adWorkUnitsByPrincipal[token.UserPrincipal.Name]; exists { + if workUnitContainsName(&(*workunits)[index], token.UserID) { + (*workunits)[index].activeDirectoryTokens = append((*workunits)[index].activeDirectoryTokens, token) + } else { + logrus.Warnf("[%v] found token for user with guid-based principal '%v' and name '%v', but no user object with that name matches the GUID or its associated DN. refusing to process", + identifyAdUserOperation, token.UserPrincipal.Name, token.UserID) + } + } else if index, exists = duplicateLocalWorkUnitsByPrincipal[token.UserPrincipal.Name]; exists { + if workUnitContainsName(&(*workunits)[index], token.UserID) { + (*workunits)[index].duplicateLocalTokens = append((*workunits)[index].duplicateLocalTokens, token) + } else { + logrus.Warnf("[%v] found token for user with guid-based principal '%v' and name '%v', but no user object with that name matches the GUID or its associated DN. refusing to process", + identifyAdUserOperation, token.UserPrincipal.Name, token.UserID) + } + } + } + + return nil +} + +func updateToken(tokenInterface v3norman.TokenInterface, userToken v3.Token, newPrincipalID string, guid string, originalUser *v3.User) error { + latestToken, err := tokenInterface.Get(userToken.Name, metav1.GetOptions{}) + if err != nil { + logrus.Errorf("[%v] token %s no longer exists: %v", migrateTokensOperation, userToken.Name, err) + return nil + } + if latestToken.Annotations == nil { + latestToken.Annotations = make(map[string]string) + } + latestToken.Annotations[adGUIDMigrationAnnotation] = guid + if latestToken.Labels == nil { + latestToken.Labels = make(map[string]string) + } + latestToken.Labels[tokens.UserIDLabel] = originalUser.Name + latestToken.Labels[adGUIDMigrationLabel] = migratedLabelValue + // use the new dnPrincipalID for the token name + latestToken.UserPrincipal.Name = newPrincipalID + // copy over other relevant fields to match the originalUser we want to keep + latestToken.UserPrincipal.UID = originalUser.UID + latestToken.UserPrincipal.LoginName = originalUser.Username + latestToken.UserPrincipal.DisplayName = originalUser.DisplayName + latestToken.UserID = originalUser.Name + + // If we get an internal error during any of these ops, there's a good chance the webhook is overwhelmed. + // We'll take the opportunity to rate limit ourselves and try again a few times. + + backoff := wait.Backoff{ + Duration: 5 * time.Second, + Factor: 1.1, + Jitter: 0.1, + Steps: 10, + } + + err = wait.ExponentialBackoff(backoff, func() (finished bool, err error) { + _, err = tokenInterface.Update(latestToken) + if err != nil { + if apierrors.IsInternalError(err) { + logrus.Errorf("[%v] internal error while updating token, will backoff and retry: %v", migrateTokensOperation, err) + return false, err + } else { + return true, fmt.Errorf("[%v] unable to update token: %w", migrateTokensOperation, err) + } + } + return true, nil + }) + if err != nil { + return fmt.Errorf("[%v] permanent error when updating token, giving up: %v", migrateTokensOperation, err) + } + + return nil +} + +func migrateTokens(workunit *migrateUserWorkUnit, sc *config.ScaledContext, dryRun bool) { + tokenInterface := sc.Management.Tokens("") + dnPrincipalID := activeDirectoryPrefix + workunit.distinguishedName + for _, userToken := range workunit.activeDirectoryTokens { + if dryRun { + logrus.Infof("[%v] DRY RUN: would migrate token '%v' from GUID principal '%v' to DN principal '%v'. "+ + "Additionally, it would add an annotation, %v, indicating the former principalID of this token "+ + "and a label, %v, to indicate that this token has been migrated", + migrateTokensOperation, userToken.Name, userToken.UserPrincipal.Name, dnPrincipalID, adGUIDMigrationAnnotation, adGUIDMigrationLabel) + } else { + err := updateToken(tokenInterface, userToken, dnPrincipalID, workunit.guid, workunit.originalUser) + if err != nil { + logrus.Errorf("[%v] error while migrating tokens for user '%v': %v", migrateTokensOperation, workunit.originalUser.Name, err) + } + } + } + + localPrincipalID := localPrefix + workunit.originalUser.Name + for _, userToken := range workunit.duplicateLocalTokens { + if dryRun { + logrus.Infof("[%v] DRY RUN: would migrate Token '%v' from duplicate local user '%v' to original user '%v'. "+ + "Would add annotation, %v, and label, %v, to indicate migration status", + migrateTokensOperation, userToken.Name, userToken.UserPrincipal.Name, localPrincipalID, adGUIDMigrationAnnotation, adGUIDMigrationLabel) + } else { + err := updateToken(tokenInterface, userToken, localPrincipalID, workunit.guid, workunit.originalUser) + if err != nil { + logrus.Errorf("[%v] error while migrating tokens for user '%v': %v", migrateTokensOperation, workunit.originalUser.Name, err) + } + } + } +} diff --git a/pkg/agent/clean/adunmigration/users.go b/pkg/agent/clean/adunmigration/users.go new file mode 100644 index 00000000000..8690a2d239c --- /dev/null +++ b/pkg/agent/clean/adunmigration/users.go @@ -0,0 +1,121 @@ +package adunmigration + +import ( + "fmt" + "strings" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/types/config" +) + +func describePlannedChanges(workunit migrateUserWorkUnit) { + logrus.Infof("DRY RUN: changes to user '%v' have NOT been saved.", workunit.originalUser.Name) + if len(workunit.duplicateUsers) > 0 { + logrus.Infof("[%v] DRY RUN: duplicate users were identified", migrateAdUserOperation) + for _, duplicateUser := range workunit.duplicateUsers { + logrus.Infof("[%v] DRY RUN: would DELETE user %v", migrateAdUserOperation, duplicateUser.Name) + } + } +} + +func deleteDuplicateUsers(workunit migrateUserWorkUnit, sc *config.ScaledContext) error { + for _, duplicateUser := range workunit.duplicateUsers { + err := sc.Management.Users("").Delete(duplicateUser.Name, &metav1.DeleteOptions{}) + if err != nil { + logrus.Errorf("[%v] failed to delete dupliate user '%v' with: %v", migrateAdUserOperation, workunit.originalUser.Name, err) + // If the duplicate deletion has failed for some reason, it is NOT safe to save the modified user, as + // this may result in a duplicate AD principal ID. Notify and skip. + + logrus.Errorf("[%v] cannot safely save modifications to user %v, skipping", migrateAdUserOperation, workunit.originalUser.Name) + return errors.Errorf("failed to delete duplicate users") + } + logrus.Infof("[%v] deleted duplicate user %v", migrateAdUserOperation, duplicateUser.Name) + } + return nil +} + +func updateModifiedUser(workunit migrateUserWorkUnit, sc *config.ScaledContext) { + workunit.originalUser.Annotations[adGUIDMigrationAnnotation] = workunit.guid + workunit.originalUser.Labels[adGUIDMigrationLabel] = migratedLabelValue + _, err := sc.Management.Users("").Update(workunit.originalUser) + if err != nil { + logrus.Errorf("[%v] failed to save modified user '%v' with: %v", migrateAdUserOperation, workunit.originalUser.Name, err) + } + logrus.Infof("[%v] user %v was successfully migrated", migrateAdUserOperation, workunit.originalUser.Name) +} + +func replaceGUIDPrincipalWithDn(user *v3.User, dn string, guid string, dryRun bool) { + // It's weird for a single user to have more than just an AD and a Local principal ID, but it *can* happen + // if Rancher has used more than one auth provider over its history. Here we'll keep all principal IDs + // that are unrelated to AD + var principalIDs []string + for _, principalID := range user.PrincipalIDs { + if !strings.HasPrefix(principalID, activeDirectoryPrefix) { + principalIDs = append(principalIDs, principalID) + } + } + principalIDs = append(principalIDs, activeDirectoryPrefix+dn) + + if dryRun { + // In dry run mode we will merely print the computed list and leave the original user object alone + logrus.Infof("[%v] DRY RUN: User '%v' with GUID '%v' would have new principals:", migrateAdUserOperation, + guid, user.Name) + for _, principalID := range principalIDs { + logrus.Infof("[%v] DRY RUN: '%v'", migrateAdUserOperation, principalID) + } + } else { + user.PrincipalIDs = principalIDs + logrus.Debugf("[%v] User '%v' with GUID %v will have new principals:", migrateAdUserOperation, + guid, user.Name) + for _, principalID := range user.PrincipalIDs { + logrus.Debugf("[%v] '%v'", migrateAdUserOperation, principalID) + } + } +} + +func isAdUser(user *v3.User) bool { + for _, principalID := range user.PrincipalIDs { + if strings.HasPrefix(principalID, activeDirectoryPrefix) { + return true + } + } + return false +} + +func adPrincipalID(user *v3.User) string { + for _, principalID := range user.PrincipalIDs { + if strings.HasPrefix(principalID, activeDirectoryPrefix) { + return principalID + } + } + return "" +} + +func localPrincipalID(user *v3.User) string { + for _, principalID := range user.PrincipalIDs { + if strings.HasPrefix(principalID, localPrefix) { + return principalID + } + } + return "" +} + +func getExternalID(principalID string) (string, error) { + parts := strings.Split(principalID, "://") + if len(parts) != 2 { + return "", fmt.Errorf("[%v] failed to parse invalid principalID: %v", identifyAdUserOperation, principalID) + } + return parts[1], nil +} + +func getScope(principalID string) (string, error) { + parts := strings.Split(principalID, "://") + if len(parts) != 2 { + return "", fmt.Errorf("[%v] failed to parse invalid principalID: %v", identifyAdUserOperation, principalID) + } + return parts[0], nil +} diff --git a/pkg/auth/providers/activedirectory/activedirectory_provider.go b/pkg/auth/providers/activedirectory/activedirectory_provider.go index 27dec1ccd4f..4330bf301d8 100644 --- a/pkg/auth/providers/activedirectory/activedirectory_provider.go +++ b/pkg/auth/providers/activedirectory/activedirectory_provider.go @@ -6,9 +6,15 @@ import ( "fmt" "strings" + "github.com/rancher/norman/httperror" + "github.com/mitchellh/mapstructure" "github.com/pkg/errors" "github.com/rancher/norman/types" + "github.com/sirupsen/logrus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + v32 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" "github.com/rancher/rancher/pkg/auth/providers/common" "github.com/rancher/rancher/pkg/auth/tokens" @@ -18,17 +24,24 @@ import ( v3 "github.com/rancher/rancher/pkg/generated/norman/management.cattle.io/v3" "github.com/rancher/rancher/pkg/types/config" "github.com/rancher/rancher/pkg/user" - "github.com/sirupsen/logrus" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" ) const ( - Name = "activedirectory" - UserScope = Name + "_user" - GroupScope = Name + "_group" - ObjectClass = "objectClass" - MemberOfAttribute = "memberOf" + Name = "activedirectory" + UserScope = Name + "_user" + GroupScope = Name + "_group" + ObjectClass = "objectClass" + MemberOfAttribute = "memberOf" + StatusConfigMapName = "ad-guid-migration" + StatusConfigMapNamespace = "cattle-system" + StatusMigrationField = "ad-guid-migration-status" + StatusMigrationFinished = "Finished" + StatusMigrationRunning = "Running" + StatusMigrationFinishedWithSkipped = "FinishedWithSkipped" + StatusMigrationFinishedWithMissing = "FinishedWithMissing" + StatusMigrationFailed = "Failed" + StatusLoginDisabled = "login is disabled while migration is running" + StatusACMigrationRunning = "migration-ad-guid-migration-status" ) var scopes = []string{UserScope, GroupScope} @@ -36,6 +49,7 @@ var scopes = []string{UserScope, GroupScope} type adProvider struct { ctx context.Context authConfigs v3.AuthConfigInterface + configMaps corev1.ConfigMapLister secrets corev1.SecretInterface userMGR user.Manager certs string @@ -47,6 +61,7 @@ func Configure(ctx context.Context, mgmtCtx *config.ScaledContext, userMGR user. return &adProvider{ ctx: ctx, authConfigs: mgmtCtx.Management.AuthConfigs(""), + configMaps: mgmtCtx.Core.ConfigMaps("").Controller().Lister(), secrets: mgmtCtx.Core.Secrets(""), userMGR: userMGR, tokenMGR: tokenMGR, @@ -83,6 +98,11 @@ func (p *adProvider) AuthenticateUser(ctx context.Context, input interface{}) (v return v3.Principal{}, nil, "", errors.New("can't find authprovider") } + // If a migration is running, we need to block logins and indicate why we are doing so + if config.Annotations != nil && config.Annotations[StatusACMigrationRunning] == StatusMigrationRunning { + return v3.Principal{}, nil, "", httperror.WrapAPIError(err, httperror.ClusterUnavailable, StatusLoginDisabled) + } + principal, groupPrincipal, err := p.loginUser(login, config, caPool, false) if err != nil { return v3.Principal{}, nil, "", err @@ -239,6 +259,13 @@ func (p *adProvider) GetUserExtraAttributes(userPrincipal v3.Principal) map[stri return extras } +type LoginDisabledError struct{} + +// Error provides a string representation of an LdapErrorNotFound +func (e LoginDisabledError) Error() string { + return StatusLoginDisabled +} + // IsDisabledProvider checks if the Azure Active Directory provider is currently disabled in Rancher. func (p *adProvider) IsDisabledProvider() (bool, error) { adConfig, _, err := p.getActiveDirectoryConfig() diff --git a/pkg/auth/providers/common/provider_util.go b/pkg/auth/providers/common/provider_util.go new file mode 100644 index 00000000000..b7573d1ae94 --- /dev/null +++ b/pkg/auth/providers/common/provider_util.go @@ -0,0 +1,46 @@ +package common + +import ( + "fmt" + "reflect" + "time" + + "github.com/mitchellh/mapstructure" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Decode will decode to the output structure by creating a custom decoder +// that uses the stringToK8sTimeHookFunc to handle the metav1.Time field properly. +func Decode(input, output any) error { + decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + DecodeHook: stringToK8sTimeHookFunc(), + Result: output, + }) + if err != nil { + return fmt.Errorf("unable to create decoder for Config: %w", err) + } + err = decoder.Decode(input) + if err != nil { + return fmt.Errorf("unable to decode Config: %w", err) + } + return nil +} + +// stringToTimeHookFunc returns a DecodeHookFunc that converts strings to metav1.Time. +func stringToK8sTimeHookFunc() mapstructure.DecodeHookFunc { + return func( + f reflect.Type, + t reflect.Type, + data interface{}) (interface{}, error) { + if f.Kind() != reflect.String { + return data, nil + } + if t != reflect.TypeOf(metav1.Time{}) { + return data, nil + } + + // Convert it by parsing + stdTime, err := time.Parse(time.RFC3339, data.(string)) + return metav1.Time{Time: stdTime}, err + } +} diff --git a/pkg/multiclustermanager/app.go b/pkg/multiclustermanager/app.go index 85add4bbca1..29a97b6f923 100644 --- a/pkg/multiclustermanager/app.go +++ b/pkg/multiclustermanager/app.go @@ -7,8 +7,13 @@ import ( "sync" "time" + "github.com/rancher/rancher/pkg/agent/clean/adunmigration" + "github.com/pkg/errors" "github.com/rancher/norman/types" + "github.com/sirupsen/logrus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/rancher/rancher/pkg/auth/providerrefresh" "github.com/rancher/rancher/pkg/auth/providers/common" "github.com/rancher/rancher/pkg/auth/tokens" @@ -28,8 +33,6 @@ import ( "github.com/rancher/rancher/pkg/tunnelserver/mcmauthorizer" "github.com/rancher/rancher/pkg/types/config" "github.com/rancher/rancher/pkg/wrangler" - "github.com/sirupsen/logrus" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type Options struct { @@ -213,6 +216,7 @@ func (m *mcm) Start(ctx context.Context) error { return errors.Wrap(err, "failed to telemetry") } + go adunmigration.UnmigrateAdGUIDUsersOnce(m.ScaledContext) tokens.StartPurgeDaemon(ctx, management) providerrefresh.StartRefreshDaemon(ctx, m.ScaledContext, management) managementdata.CleanupOrphanedSystemUsers(ctx, management)