From 489fe379f7b5c26b516469acb5d165d10af7682a Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Sat, 10 Sep 2022 02:39:30 -0500 Subject: [PATCH 01/15] Rename BaseOperatingSystem... to BaseOSContainer.. We decided baseOSContainerImage was clearer than baseOperatingSystemContainer (and the same for the extensions container) so this renames all of this throughout to match. I'm doing this first before all of the rest of the "new image by default" code so if we have to revert one of those commits, we don't have to worry about the names. This just renames all instances of the BaseOperatingSystem variables througout to BaseOS and favors the more clear "ContainerImage" suffix rather than the less clear "Container", as "Container" typically signifies something that's running. --- cmd/machine-config-operator/bootstrap.go | 70 +++++++++---------- ...machine-config-operator_05_osimageurl.yaml | 4 +- lib/resourcemerge/machineconfig.go | 4 +- manifests/controllerconfig.crd.yaml | 10 +-- .../v1/types.go | 8 +-- pkg/controller/render/render_controller.go | 6 +- pkg/operator/bootstrap.go | 4 +- pkg/operator/images.go | 4 +- pkg/operator/sync.go | 14 ++-- 9 files changed, 62 insertions(+), 62 deletions(-) diff --git a/cmd/machine-config-operator/bootstrap.go b/cmd/machine-config-operator/bootstrap.go index 7cdf60abff..e092ff9d2f 100644 --- a/cmd/machine-config-operator/bootstrap.go +++ b/cmd/machine-config-operator/bootstrap.go @@ -23,31 +23,31 @@ var ( } bootstrapOpts struct { - baremetalRuntimeCfgImage string - cloudConfigFile string - configFile string - cloudProviderCAFile string - corednsImage string - destinationDir string - haproxyImage string - imagesConfigMapFile string - infraConfigFile string - infraImage string - releaseImage string - keepalivedImage string - kubeCAFile string - mcoImage string - oauthProxyImage string - networkConfigFile string - oscontentImage string - pullSecretFile string - rootCAFile string - proxyConfigFile string - additionalTrustBundleFile string - dnsConfigFile string - imageReferences string - baseOperatingSystemContainer string - baseOperatingSystemExtensionsContainer string + baremetalRuntimeCfgImage string + cloudConfigFile string + configFile string + cloudProviderCAFile string + corednsImage string + destinationDir string + haproxyImage string + imagesConfigMapFile string + infraConfigFile string + infraImage string + releaseImage string + keepalivedImage string + kubeCAFile string + mcoImage string + oauthProxyImage string + networkConfigFile string + oscontentImage string + pullSecretFile string + rootCAFile string + proxyConfigFile string + additionalTrustBundleFile string + dnsConfigFile string + imageReferences string + baseOSContainerImage string + baseOSExtensionsContainerImage string } ) @@ -136,11 +136,11 @@ func runBootstrapCmd(cmd *cobra.Command, args []string) { bootstrapOpts.oauthProxyImage = findImageOrDie(imgstream, "oauth-proxy") bootstrapOpts.infraImage = findImageOrDie(imgstream, "pod") bootstrapOpts.haproxyImage = findImageOrDie(imgstream, "haproxy-router") - bootstrapOpts.baseOperatingSystemContainer, err = findImage(imgstream, baseOSContainerImageTag) + bootstrapOpts.baseOSContainerImage, err = findImage(imgstream, baseOSContainerImageTag) if err != nil { glog.Warningf("Base OS container not found: %s", err) } - bootstrapOpts.baseOperatingSystemExtensionsContainer, err = findImage(imgstream, fmt.Sprintf("%s-extensions", baseOSContainerImageTag)) + bootstrapOpts.baseOSExtensionsContainerImage, err = findImage(imgstream, fmt.Sprintf("%s-extensions", baseOSContainerImageTag)) if err != nil { glog.Warningf("Base OS extensions container not found: %s", err) } @@ -148,14 +148,14 @@ func runBootstrapCmd(cmd *cobra.Command, args []string) { imgs := operator.Images{ RenderConfigImages: operator.RenderConfigImages{ - MachineConfigOperator: bootstrapOpts.mcoImage, - MachineOSContent: bootstrapOpts.oscontentImage, - KeepalivedBootstrap: bootstrapOpts.keepalivedImage, - CorednsBootstrap: bootstrapOpts.corednsImage, - BaremetalRuntimeCfgBootstrap: bootstrapOpts.baremetalRuntimeCfgImage, - OauthProxy: bootstrapOpts.oauthProxyImage, - BaseOperatingSystemContainer: bootstrapOpts.baseOperatingSystemContainer, - BaseOperatingSystemExtensionsContainer: bootstrapOpts.baseOperatingSystemExtensionsContainer, + MachineConfigOperator: bootstrapOpts.mcoImage, + MachineOSContent: bootstrapOpts.oscontentImage, + KeepalivedBootstrap: bootstrapOpts.keepalivedImage, + CorednsBootstrap: bootstrapOpts.corednsImage, + BaremetalRuntimeCfgBootstrap: bootstrapOpts.baremetalRuntimeCfgImage, + OauthProxy: bootstrapOpts.oauthProxyImage, + BaseOSContainerImage: bootstrapOpts.baseOSContainerImage, + BaseOSExtensionsContainerImage: bootstrapOpts.baseOSExtensionsContainerImage, }, ControllerConfigImages: operator.ControllerConfigImages{ InfraImage: bootstrapOpts.infraImage, diff --git a/install/0000_80_machine-config-operator_05_osimageurl.yaml b/install/0000_80_machine-config-operator_05_osimageurl.yaml index 122359a5be..f654ec49dd 100644 --- a/install/0000_80_machine-config-operator_05_osimageurl.yaml +++ b/install/0000_80_machine-config-operator_05_osimageurl.yaml @@ -11,8 +11,8 @@ data: releaseVersion: 0.0.1-snapshot # This (will eventually) replace the below when https://github.com/openshift/enhancements/pull/1032 # progresses towards the default. - baseOperatingSystemContainer: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8" - #baseOperatingSystemExtensionsContainer: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions" + baseOSContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8" + #baseOSExtensionsContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions" # The OS payload used for 4.10 and below; more information in # https://github.com/openshift/machine-config-operator/blob/master/docs/OSUpgrades.md # (The original issue was https://github.com/openshift/machine-config-operator/issues/183 ) diff --git a/lib/resourcemerge/machineconfig.go b/lib/resourcemerge/machineconfig.go index c3aed344a1..dc58018594 100644 --- a/lib/resourcemerge/machineconfig.go +++ b/lib/resourcemerge/machineconfig.go @@ -72,8 +72,8 @@ func ensureControllerConfigSpec(modified *bool, existing *mcfgv1.ControllerConfi resourcemerge.SetStringIfSet(modified, &existing.Platform, required.Platform) resourcemerge.SetStringIfSet(modified, &existing.EtcdDiscoveryDomain, required.EtcdDiscoveryDomain) resourcemerge.SetStringIfSet(modified, &existing.OSImageURL, required.OSImageURL) - resourcemerge.SetStringIfSet(modified, &existing.BaseOperatingSystemContainer, required.BaseOperatingSystemContainer) - resourcemerge.SetStringIfSet(modified, &existing.BaseOperatingSystemExtensionsContainer, required.BaseOperatingSystemExtensionsContainer) + resourcemerge.SetStringIfSet(modified, &existing.BaseOSContainerImage, required.BaseOSContainerImage) + resourcemerge.SetStringIfSet(modified, &existing.BaseOSExtensionsContainerImage, required.BaseOSExtensionsContainerImage) resourcemerge.SetStringIfSet(modified, &existing.NetworkType, required.NetworkType) setBytesIfSet(modified, &existing.AdditionalTrustBundle, required.AdditionalTrustBundle) diff --git a/manifests/controllerconfig.crd.yaml b/manifests/controllerconfig.crd.yaml index eeda4d94c8..56024d0c7f 100644 --- a/manifests/controllerconfig.crd.yaml +++ b/manifests/controllerconfig.crd.yaml @@ -968,12 +968,12 @@ spec: contains the OS update payload. Its value is taken from the data.osImageURL field on the machine-config-osimageurl ConfigMap. type: string - baseOperatingSystemContainer: - description: baseOperatingSystemContainer is the new format operating system update image. + baseOSContainerImage: + description: baseOSContainerImage is the new format operating system update image. See https://github.com/openshift/enhancements/pull/1032 type: string - baseOperatingSystemExtensionsContainer: - description: baseOperatingSystemExtensionsContainer is the extensions container matching new format operating system update image. + baseOSExtensionsContainerImage: + description: baseOSExtensionsContainerImage is the extensions container matching new format operating system update image. See https://github.com/openshift/enhancements/pull/1032 type: string platform: @@ -1050,7 +1050,7 @@ spec: - ipFamilies - kubeAPIServerServingCAData - osImageURL - - baseOperatingSystemContainer + - baseOSContainerImage - proxy - releaseImage - rootCAData diff --git a/pkg/apis/machineconfiguration.openshift.io/v1/types.go b/pkg/apis/machineconfiguration.openshift.io/v1/types.go index cdc12b4eec..242bf6a67c 100644 --- a/pkg/apis/machineconfiguration.openshift.io/v1/types.go +++ b/pkg/apis/machineconfiguration.openshift.io/v1/types.go @@ -72,11 +72,11 @@ type ControllerConfigSpec struct { // images is map of images that are used by the controller to render templates under ./templates/ Images map[string]string `json:"images"` - // BaseOperatingSystemContainer is the new-format container image for operating system updates. - BaseOperatingSystemContainer string `json:"baseOperatingSystemContainer"` + // BaseOSContainerImage is the new-format container image for operating system updates. + BaseOSContainerImage string `json:"baseOSContainerImage"` - // BaseOperatingSystemExtensionsContainer is the matching extensions container for the new-format container - BaseOperatingSystemExtensionsContainer string `json:"baseOperatingSystemExtensionsContainer"` + // BaseOSExtensionsContainerImage is the matching extensions container for the new-format container + BaseOSExtensionsContainerImage string `json:"baseOSExtensionsContainerImage"` // OSImageURL is the old-format container image that contains the OS update payload. OSImageURL string `json:"osImageURL"` diff --git a/pkg/controller/render/render_controller.go b/pkg/controller/render/render_controller.go index 49fb33213f..86146d6f3c 100644 --- a/pkg/controller/render/render_controller.go +++ b/pkg/controller/render/render_controller.go @@ -554,10 +554,10 @@ func generateRenderedMachineConfig(pool *mcfgv1.MachineConfigPool, configs []*mc return nil, fmt.Errorf("Ignoring controller config generated without %s annotation (my version: %s)", daemonconsts.GeneratedByVersionAnnotationKey, version.Raw) } - if cconfig.Spec.BaseOperatingSystemContainer == "" { - glog.Warningf("No BaseOperatingSystemContainer set") + if cconfig.Spec.BaseOSContainerImage == "" { + glog.Warningf("No BaseOSContainerImage set") } else { - glog.Infof("BaseOperatingSystemContainer=%s", cconfig.Spec.BaseOperatingSystemContainer) + glog.Infof("BaseOSContainerImage=%s", cconfig.Spec.BaseOSContainerImage) } // Before merging all MCs for a specific pool, let's make sure MachineConfigs are valid diff --git a/pkg/operator/bootstrap.go b/pkg/operator/bootstrap.go index b146b0ea1c..e167ed7716 100644 --- a/pkg/operator/bootstrap.go +++ b/pkg/operator/bootstrap.go @@ -139,8 +139,8 @@ func RenderBootstrap( spec.RootCAData = bundle spec.PullSecret = nil spec.OSImageURL = imgs.MachineOSContent - spec.BaseOperatingSystemContainer = imgs.BaseOperatingSystemContainer - spec.BaseOperatingSystemExtensionsContainer = imgs.BaseOperatingSystemExtensionsContainer + spec.BaseOSContainerImage = imgs.BaseOSContainerImage + spec.BaseOSExtensionsContainerImage = imgs.BaseOSExtensionsContainerImage spec.ReleaseImage = releaseImage spec.Images = map[string]string{ templatectrl.MachineConfigOperatorKey: imgs.MachineConfigOperator, diff --git a/pkg/operator/images.go b/pkg/operator/images.go index 33a997705b..1d7a4d48e6 100644 --- a/pkg/operator/images.go +++ b/pkg/operator/images.go @@ -18,9 +18,9 @@ type RenderConfigImages struct { MachineConfigOperator string `json:"machineConfigOperator"` MachineOSContent string `json:"machineOSContent"` // The new format image - BaseOperatingSystemContainer string `json:"baseOperatingSystemContainer"` + BaseOSContainerImage string `json:"baseOSContainerImage"` // The matching extensions container for the new format image - BaseOperatingSystemExtensionsContainer string `json:"baseOperatingSystemExtensionsContainer"` + BaseOSExtensionsContainerImage string `json:"baseOSExtensionsContainerImage"` // These have to be named differently from the ones in ControllerConfigImages // or we get errors about ambiguous selectors because both structs are // combined in the Images struct. diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 6a7ff40a49..3a711f03fb 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -260,8 +260,8 @@ func (optr *Operator) syncRenderConfig(_ *renderConfig) error { return err } imgs.MachineOSContent = osimageurl - imgs.BaseOperatingSystemContainer = oscontainer - imgs.BaseOperatingSystemExtensionsContainer = osextensionscontainer + imgs.BaseOSContainerImage = oscontainer + imgs.BaseOSExtensionsContainerImage = osextensionscontainer // sync up the ControllerConfigSpec infra, network, proxy, dns, err := optr.getGlobalConfig() @@ -310,8 +310,8 @@ func (optr *Operator) syncRenderConfig(_ *renderConfig) error { spec.RootCAData = bundle spec.PullSecret = &corev1.ObjectReference{Namespace: "openshift-config", Name: "pull-secret"} spec.OSImageURL = imgs.MachineOSContent - spec.BaseOperatingSystemContainer = imgs.BaseOperatingSystemContainer - spec.BaseOperatingSystemExtensionsContainer = imgs.BaseOperatingSystemExtensionsContainer + spec.BaseOSContainerImage = imgs.BaseOSContainerImage + spec.BaseOSExtensionsContainerImage = imgs.BaseOSExtensionsContainerImage spec.Images = map[string]string{ templatectrl.MachineConfigOperatorKey: imgs.MachineConfigOperator, @@ -875,9 +875,9 @@ func (optr *Operator) getOsImageURLs(namespace string) (string, string, string, return "", "", "", fmt.Errorf("refusing to read osImageURL version %q, operator version %q", releaseVersion, optrVersion) } - newextensions, hasNewExtensions := cm.Data["baseOperatingSystemExtensionsContainer"] + newextensions, hasNewExtensions := cm.Data["baseOSExtensionsContainerImage"] - newformat, hasNewFormat := cm.Data["baseOperatingSystemContainer"] + newformat, hasNewFormat := cm.Data["baseOSContainerImage"] oldformat, hasOldFormat := cm.Data["osImageURL"] @@ -888,7 +888,7 @@ func (optr *Operator) getOsImageURLs(namespace string) (string, string, string, // If we don't have a new format image, and we can't fall back to the old one if !hasOldFormat && !hasNewFormat { - return "", "", "", fmt.Errorf("Missing baseOperatingSystemContainer and osImageURL from configmap") + return "", "", "", fmt.Errorf("Missing baseOSContainerImage and osImageURL from configmap") } return newformat, newextensions, oldformat, nil From 16ac486a3f5ad9294a92a0e6f22d4d83211763cc Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Mon, 29 Aug 2022 11:04:17 -0500 Subject: [PATCH 02/15] Kernel/Exetensions support for new image format Now that we have the extensions container, we can apply extensions and switch kernels again. This just re-enables that functionality for the 'new image' path during daemon updates, and removes the "not supported" messages. --- pkg/daemon/update.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 476178274f..4138c65484 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -2116,15 +2116,14 @@ func (dn *CoreOSDaemon) applyLayeredOSChanges(mcDiff machineConfigDiff, oldConfi } } - // TODO(jkyros): We can't currently switch kernels on layered images, so only allow it if they're both default. We'll come back for this when it's supported. - // If you did try to switch kernels when using layered image, you would get a "No enabled repositories" error. - if !(canonicalizeKernelType(oldConfig.Spec.KernelType) == ctrlcommon.KernelTypeDefault && canonicalizeKernelType(newConfig.Spec.KernelType) == ctrlcommon.KernelTypeDefault) { - return fmt.Errorf("Non-default kernels are not currently supported for layered images. (old: %s new %s)", oldConfig.Spec.KernelType, newConfig.Spec.KernelType) + // Switch to real time kernel + if err := dn.switchKernel(oldConfig, newConfig); err != nil { + return err } - // TODO(jkyros): This is where we will handle Joseph's extensions container - if len(newConfig.Spec.Extensions) > 0 { - return fmt.Errorf("Extensions are not currently supported with layered images, but extensions were supplied: %s", strings.Join(newConfig.Spec.Extensions, " ")) + // Apply extensions + if err := dn.applyExtensions(oldConfig, newConfig); err != nil { + return err } return nil From 4802b34c51f2140fde2fe61c3ae7f939d8968aae Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Tue, 30 Aug 2022 01:56:00 -0500 Subject: [PATCH 03/15] Initialize NodeUpdaterClient with kubelet secrets Both rpm-ostree rebase and update require access to the pull secrets in order to pull manifests and images. This changes the behavior such that we symlink the kubelet config.json secrets to /run/ostree/auth.json when we initialize our "NodeUpdaterClient" (rather than only in rebase) so any of our rpm-ostree calls can use it. --- pkg/daemon/rpm-ostree.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/daemon/rpm-ostree.go b/pkg/daemon/rpm-ostree.go index e64cf96dda..346de6c6e1 100644 --- a/pkg/daemon/rpm-ostree.go +++ b/pkg/daemon/rpm-ostree.go @@ -135,6 +135,13 @@ func (r *RpmOstreeClient) Initialize() error { return err } + // Commands like update and rebase need the pull secrets to pull images and manifests, + // make sure we get access to them when we Initialize + err := useKubeletConfigSecrets() + if err != nil { + return fmt.Errorf("Error while ensuring access to kublet config.json pull secrets: %w", err) + } + return nil } @@ -342,13 +349,6 @@ func (r *RpmOstreeClient) IsBootableImage(imgURL string) (bool, error) { // RebaseLayered rebases system or errors if already rebased func (r *RpmOstreeClient) RebaseLayered(imgURL string) (err error) { glog.Infof("Executing rebase to %s", imgURL) - - // For now, just let ostree use the kublet config.json, - err = useKubeletConfigSecrets() - if err != nil { - return fmt.Errorf("Error while ensuring access to kublet config.json pull secrets: %w", err) - } - return runRpmOstree("rebase", "--experimental", "ostree-unverified-registry:"+imgURL) } From 2106a4456a17cb74ac099e92730f125ba3fc5ea9 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Mon, 29 Aug 2022 11:55:09 -0500 Subject: [PATCH 04/15] Reenable extensions container in osimageurl map --- .../0000_80_machine-config-operator_05_osimageurl.yaml | 2 +- install/image-references | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/install/0000_80_machine-config-operator_05_osimageurl.yaml b/install/0000_80_machine-config-operator_05_osimageurl.yaml index f654ec49dd..bf49ba613b 100644 --- a/install/0000_80_machine-config-operator_05_osimageurl.yaml +++ b/install/0000_80_machine-config-operator_05_osimageurl.yaml @@ -12,7 +12,7 @@ data: # This (will eventually) replace the below when https://github.com/openshift/enhancements/pull/1032 # progresses towards the default. baseOSContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8" - #baseOSExtensionsContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions" + baseOSExtensionsContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions" # The OS payload used for 4.10 and below; more information in # https://github.com/openshift/machine-config-operator/blob/master/docs/OSUpgrades.md # (The original issue was https://github.com/openshift/machine-config-operator/issues/183 ) diff --git a/install/image-references b/install/image-references index 6211bbbeb3..4170492251 100644 --- a/install/image-references +++ b/install/image-references @@ -27,11 +27,10 @@ spec: from: kind: DockerImage name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8 - # Uncomment this after https://issues.redhat.com/browse/COS-1646 lands - # - name: rhel-coreos-8-extensions - # from: - # kind: DockerImage - # name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions + - name: rhel-coreos-8-extensions + from: + kind: DockerImage + name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions - name: keepalived-ipfailover from: kind: DockerImage From ed7c1195882ffa20743e84d2dc0d9b2e1b72d5b9 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Tue, 6 Sep 2022 22:14:56 -0500 Subject: [PATCH 05/15] Plumbing for extensions container in machineconfig Until we figure out if/how we want to use the extensions container as a service, we're going to have to extract it to disk to use it. In order for that to happen, the daemon needs to know where to get it, so it needs to be present in machineconfig. This adds the extensions container to the machineconfig type, the crd, and the resourcemerge library. --- ...0000_80_machine-config-operator_01_machineconfig.crd.yaml | 4 ++++ lib/resourcemerge/machineconfig.go | 1 + pkg/apis/machineconfiguration.openshift.io/v1/types.go | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/install/0000_80_machine-config-operator_01_machineconfig.crd.yaml b/install/0000_80_machine-config-operator_01_machineconfig.crd.yaml index 10608f76e1..24736b67de 100644 --- a/install/0000_80_machine-config-operator_01_machineconfig.crd.yaml +++ b/install/0000_80_machine-config-operator_01_machineconfig.crd.yaml @@ -66,6 +66,10 @@ spec: description: MachineConfigSpec is the spec for MachineConfig type: object properties: + baseOSExtensionsContainerImage: + description: baseOperatingSystemExtensionContainer specifies the remote location that will be used + to fetch the extensions container matching a new-format OS image + type: string config: description: Config is a Ignition Config object. type: object diff --git a/lib/resourcemerge/machineconfig.go b/lib/resourcemerge/machineconfig.go index dc58018594..f4185d21fc 100644 --- a/lib/resourcemerge/machineconfig.go +++ b/lib/resourcemerge/machineconfig.go @@ -47,6 +47,7 @@ func EnsureMachineConfigPool(modified *bool, existing *mcfgv1.MachineConfigPool, func ensureMachineConfigSpec(modified *bool, existing *mcfgv1.MachineConfigSpec, required mcfgv1.MachineConfigSpec) { resourcemerge.SetStringIfSet(modified, &existing.OSImageURL, required.OSImageURL) resourcemerge.SetStringIfSet(modified, &existing.KernelType, required.KernelType) + resourcemerge.SetStringIfSet(modified, &existing.BaseOSExtensionsContainerImage, required.BaseOSExtensionsContainerImage) if !equality.Semantic.DeepEqual(existing.KernelArguments, required.KernelArguments) { *modified = true diff --git a/pkg/apis/machineconfiguration.openshift.io/v1/types.go b/pkg/apis/machineconfiguration.openshift.io/v1/types.go index 242bf6a67c..52851ec5b9 100644 --- a/pkg/apis/machineconfiguration.openshift.io/v1/types.go +++ b/pkg/apis/machineconfiguration.openshift.io/v1/types.go @@ -200,6 +200,11 @@ type MachineConfigSpec struct { // OSImageURL specifies the remote location that will be used to // fetch the OS. OSImageURL string `json:"osImageURL"` + + // BaseOperatingSystemExtensionContainer specifies the remote location that will be used + // to fetch the extensions container matching a new-format OS image + BaseOSExtensionsContainerImage string `json:"baseOSExtensionsContainerImage"` + // Config is a Ignition Config object. Config runtime.RawExtension `json:"config"` From 588386fa0dc036d8a15bbd5685b698a4086b3a50 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Tue, 6 Sep 2022 22:51:07 -0500 Subject: [PATCH 06/15] Pass extensions container through to machineconfig This passes the extensions container from controller config through to machineconfig. --- ...ne-config-operator_01_machineconfig.crd.yaml | 2 +- .../v1/types.go | 2 +- pkg/controller/common/helpers.go | 17 ++++++++--------- pkg/controller/common/helpers_test.go | 9 +++++---- pkg/controller/render/render_controller.go | 3 ++- 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/install/0000_80_machine-config-operator_01_machineconfig.crd.yaml b/install/0000_80_machine-config-operator_01_machineconfig.crd.yaml index 24736b67de..9161b821b8 100644 --- a/install/0000_80_machine-config-operator_01_machineconfig.crd.yaml +++ b/install/0000_80_machine-config-operator_01_machineconfig.crd.yaml @@ -67,7 +67,7 @@ spec: type: object properties: baseOSExtensionsContainerImage: - description: baseOperatingSystemExtensionContainer specifies the remote location that will be used + description: baseOSExtensionsContainerImage specifies the remote location that will be used to fetch the extensions container matching a new-format OS image type: string config: diff --git a/pkg/apis/machineconfiguration.openshift.io/v1/types.go b/pkg/apis/machineconfiguration.openshift.io/v1/types.go index 52851ec5b9..5d64a9de3f 100644 --- a/pkg/apis/machineconfiguration.openshift.io/v1/types.go +++ b/pkg/apis/machineconfiguration.openshift.io/v1/types.go @@ -201,7 +201,7 @@ type MachineConfigSpec struct { // fetch the OS. OSImageURL string `json:"osImageURL"` - // BaseOperatingSystemExtensionContainer specifies the remote location that will be used + // BaseOSExtensionsContainerImage specifies the remote location that will be used // to fetch the extensions container matching a new-format OS image BaseOSExtensionsContainerImage string `json:"baseOSExtensionsContainerImage"` diff --git a/pkg/controller/common/helpers.go b/pkg/controller/common/helpers.go index c2b1428b27..1edd3cdc5f 100644 --- a/pkg/controller/common/helpers.go +++ b/pkg/controller/common/helpers.go @@ -58,7 +58,7 @@ func strToPtr(s string) *string { // It uses the Ignition config from first object as base and appends all the rest. // Kernel arguments are concatenated. // It defaults to the OSImageURL provided by the CVO but allows a MC provided OSImageURL to take precedence. -func MergeMachineConfigs(configs []*mcfgv1.MachineConfig, osImageURL string) (*mcfgv1.MachineConfig, error) { +func MergeMachineConfigs(configs []*mcfgv1.MachineConfig, cconfig *mcfgv1.ControllerConfig) (*mcfgv1.MachineConfig, error) { if len(configs) == 0 { return nil, nil } @@ -129,21 +129,20 @@ func MergeMachineConfigs(configs []*mcfgv1.MachineConfig, osImageURL string) (*m } // For layering, we want to let the user override OSImageURL again - overriddenOSImageURL := "" + // The template configs always match what's in controllerconfig because they get rendered from there, + // so the only way we get an override here is if the user adds something different + osImageURL := cconfig.Spec.OSImageURL for _, cfg := range configs { if cfg.Spec.OSImageURL != "" { - overriddenOSImageURL = cfg.Spec.OSImageURL + osImageURL = cfg.Spec.OSImageURL } } - // Make sure it's obvious in the logs that it was overridden - if overriddenOSImageURL != "" && overriddenOSImageURL != osImageURL { - osImageURL = overriddenOSImageURL - } return &mcfgv1.MachineConfig{ Spec: mcfgv1.MachineConfigSpec{ - OSImageURL: osImageURL, - KernelArguments: kargs, + OSImageURL: osImageURL, + BaseOSExtensionsContainerImage: cconfig.Spec.BaseOSExtensionsContainerImage, + KernelArguments: kargs, Config: runtime.RawExtension{ Raw: rawOutIgn, }, diff --git a/pkg/controller/common/helpers_test.go b/pkg/controller/common/helpers_test.go index 156a926fd6..2ad2e84b49 100644 --- a/pkg/controller/common/helpers_test.go +++ b/pkg/controller/common/helpers_test.go @@ -234,7 +234,8 @@ func TestParseAndConvert(t *testing.T) { func TestMergeMachineConfigs(t *testing.T) { // variable setup - osImageURL := "testURL" + cconfig := &mcfgv1.ControllerConfig{} + cconfig.Spec.OSImageURL = "testURL" fips := true kargs := []string{"testKarg"} extensions := []string{"testExtensions"} @@ -246,7 +247,7 @@ func TestMergeMachineConfigs(t *testing.T) { }, } inMachineConfigs := []*mcfgv1.MachineConfig{machineConfigFIPS} - mergedMachineConfig, err := MergeMachineConfigs(inMachineConfigs, osImageURL) + mergedMachineConfig, err := MergeMachineConfigs(inMachineConfigs, cconfig) require.Nil(t, err) // check that the outgoing config does have the version string set, @@ -260,7 +261,7 @@ func TestMergeMachineConfigs(t *testing.T) { require.Nil(t, err) expectedMachineConfig := &mcfgv1.MachineConfig{ Spec: mcfgv1.MachineConfigSpec{ - OSImageURL: osImageURL, + OSImageURL: cconfig.Spec.OSImageURL, KernelArguments: []string{}, Config: runtime.RawExtension{ Raw: rawOutIgn, @@ -325,7 +326,7 @@ func TestMergeMachineConfigs(t *testing.T) { machineConfigIgn, machineConfigFIPS, } - mergedMachineConfig, err = MergeMachineConfigs(inMachineConfigs, osImageURL) + mergedMachineConfig, err = MergeMachineConfigs(inMachineConfigs, cconfig) require.Nil(t, err) expectedMachineConfig = &mcfgv1.MachineConfig{ diff --git a/pkg/controller/render/render_controller.go b/pkg/controller/render/render_controller.go index 86146d6f3c..dfac75d5c3 100644 --- a/pkg/controller/render/render_controller.go +++ b/pkg/controller/render/render_controller.go @@ -567,7 +567,8 @@ func generateRenderedMachineConfig(pool *mcfgv1.MachineConfigPool, configs []*mc } } - merged, err := ctrlcommon.MergeMachineConfigs(configs, cconfig.Spec.OSImageURL) + merged, err := ctrlcommon.MergeMachineConfigs(configs, cconfig) + if err != nil { return nil, err } From b35936e8ebc740fffc0c2997ffdd074f00d73a4d Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Thu, 1 Sep 2022 18:45:48 -0500 Subject: [PATCH 07/15] Extract the os extensions container to disk Since we can't use the extensions container as a service right now due to boostrap/dns/complexity, for now, we're going to extract them to disk like we did the extensions in machine-os-contanet. This pulls and extracts the extensions container to /run and adds it as a yum repo if we are on a new format image and there are extensions present in MachineConfig. --- pkg/daemon/update.go | 71 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 4138c65484..f8fe0aae3d 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -42,9 +42,10 @@ const ( // SSH Keys for user "core" will only be written at /home/core/.ssh coreUserSSHPath = "/home/core/.ssh/" // fipsFile is the file to check if FIPS is enabled - fipsFile = "/proc/sys/crypto/fips_enabled" - extensionsRepo = "/etc/yum.repos.d/coreos-extensions.repo" - osImageContentBaseDir = "/run/mco-machine-os-content/" + fipsFile = "/proc/sys/crypto/fips_enabled" + extensionsRepo = "/etc/yum.repos.d/coreos-extensions.repo" + osImageContentBaseDir = "/run/mco-machine-os-content/" + osExtensionsContentBaseDir = "/run/mco-extensions/" // These are the actions for a node to take after applying config changes. (e.g. a new machineconfig is applied) // "None" means no special action needs to be taken @@ -238,6 +239,18 @@ func addExtensionsRepo(osImageContentDir string) error { return nil } +// addLayeredExtensionsRepo adds a repo into /etc/yum.repos.d/ which we use later to +// install extensions and rt-kernel. This is separate from addExtensionsRepo because when we're +// extracting only the extensions container (because with the new format images they are packaged separately), +// we extract to a different location +func addLayeredExtensionsRepo(extensionsImageContentDir string) error { + repoContent := "[coreos-extensions]\nenabled=1\nmetadata_expire=1m\nbaseurl=" + extensionsImageContentDir + "/usr/share/rpm-ostree/extensions/\ngpgcheck=0\nskip_if_unavailable=False\n" + if err := writeFileAtomicallyWithDefaults(extensionsRepo, []byte(repoContent)); err != nil { + return err + } + return nil +} + // podmanRemove kills and removes a container func podmanRemove(cid string) { // Ignore errors here @@ -325,6 +338,38 @@ func ExtractOSImage(imgURL string) (osImageContentDir string, err error) { return } +// ExtractExtensionsImage extracts the OS extensions content in a temporary directory under /run/machine-os-extensions +// and returns the path on successful extraction +func ExtractExtensionsImage(imgURL string) (osExtensionsImageContentDir string, err error) { + var registryConfig []string + if _, err := os.Stat(kubeletAuthFile); err == nil { + registryConfig = append(registryConfig, "--registry-config", kubeletAuthFile) + } + if err = os.MkdirAll(osExtensionsContentBaseDir, 0o755); err != nil { + err = fmt.Errorf("error creating directory %s: %w", osExtensionsContentBaseDir, err) + return + } + + if osExtensionsImageContentDir, err = ioutil.TempDir(osExtensionsContentBaseDir, "os-extensions-content-"); err != nil { + return + } + + // Extract the image + args := []string{"image", "extract", "--path", "/:" + osExtensionsImageContentDir} + args = append(args, registryConfig...) + args = append(args, imgURL) + if _, err = pivotutils.RunExtBackground(cmdRetriesCount, "oc", args...); err != nil { + // Workaround fixes for the environment where oc image extract fails. + // See https://bugzilla.redhat.com/show_bug.cgi?id=1862979 + glog.Infof("Falling back to using podman cp to fetch OS image content") + if err = podmanCopy(imgURL, osExtensionsImageContentDir); err != nil { + return + } + } + + return +} + // Remove pending deployment on OSTree based system func removePendingDeployment() error { return runRpmOstree("cleanup", "-p") @@ -2074,6 +2119,26 @@ func (dn *Daemon) reboot(rationale string) error { } func (dn *CoreOSDaemon) applyLayeredOSChanges(mcDiff machineConfigDiff, oldConfig, newConfig *mcfgv1.MachineConfig) (retErr error) { + + var osExtensionsContentDir string + var err error + if mcDiff.extensions || mcDiff.kernelType { + + // TODO(jkyros): the original intent was that we use the extensions container as a service, but that currently results + // in a lot of complexity due to boostrap and firstboot where the service isn't easily available, so for now we are going + // to extract them to disk like we did previously. + if osExtensionsContentDir, err = ExtractExtensionsImage(newConfig.Spec.BaseOSExtensionsContainerImage); err != nil { + return err + } + // Delete extracted OS image once we are done. + defer os.RemoveAll(osExtensionsContentDir) + + if err := addLayeredExtensionsRepo(osExtensionsContentDir); err != nil { + return err + } + defer os.Remove(extensionsRepo) + } + // Update OS if mcDiff.osUpdate { if err := dn.updateLayeredOS(newConfig); err != nil { From 6317747450e67d52573f15c231bc78c761a7a440 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Fri, 9 Sep 2022 02:13:48 -0500 Subject: [PATCH 08/15] Allow operator to skip the OSImageURL check Currently if someone overrides OSImageURL on their master pool, the required pools sync in the operator will fail because it checks to make sure the OSImageURL matches, and when it's overridden it doesn't. (This results in degradation and timeouts). The check was originally put in to fix a bug around proper update completion reporting. This allows the check to be skipped if the user has "taken the wheel" by overriding OSImageURL. --- pkg/controller/common/constants.go | 3 ++ pkg/operator/status.go | 10 ++++++- pkg/operator/status_test.go | 44 +++++++++++++++++++++++------- 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/pkg/controller/common/constants.go b/pkg/controller/common/constants.go index aa1fa75961..12177c3618 100644 --- a/pkg/controller/common/constants.go +++ b/pkg/controller/common/constants.go @@ -10,6 +10,9 @@ const ( // ReleaseImageVersionAnnotationKey is used to tag the rendered machineconfigs & controller config with the release image version. ReleaseImageVersionAnnotationKey = "machineconfiguration.openshift.io/release-image-version" + // OSImageURLOverriddenKey is used to tag a rendered machineconfig when OSImageURL has been overridden from default using machineconfig + OSImageURLOverriddenKey = "machineconfiguration.openshift.io/os-image-url-overridden" + // ControllerConfigName is the name of the ControllerConfig object that controllers use ControllerConfigName = "machine-config-controller" diff --git a/pkg/operator/status.go b/pkg/operator/status.go index a3f9d81e86..d37e81ec56 100644 --- a/pkg/operator/status.go +++ b/pkg/operator/status.go @@ -612,8 +612,16 @@ func isMachineConfigPoolConfigurationValid(pool *mcfgv1.MachineConfigPool, versi if err != nil { return err } + + // TODO(jkyros): For "Phase 0" layering, we're going to allow this check to pass once the user has "taken the wheel" by overriding OSImageURL. + // We will find a way to make this more visible to the user somewhere since the MCO is kind of "lying" about completing the + // upgrade to the new os version otherwise. if renderedMC.Spec.OSImageURL != osURL { - return fmt.Errorf("osImageURL mismatch for %s in %s expected: %s got: %s", pool.GetName(), renderedMC.Name, osURL, renderedMC.Spec.OSImageURL) + // If we didn't override OSImageURL, this is still bad, because it means that we aren't on the proper OS image yet + _, ok := renderedMC.Annotations[ctrlcommon.OSImageURLOverriddenKey] + if !ok { + return fmt.Errorf("osImageURL mismatch for %s in %s expected: %s got: %s", pool.GetName(), renderedMC.Name, osURL, renderedMC.Spec.OSImageURL) + } } // check that the rendered config matches the OCP release version for cases where there is no OSImageURL change nor new MCO commit diff --git a/pkg/operator/status_test.go b/pkg/operator/status_test.go index e9a00fce12..c9a6d30563 100644 --- a/pkg/operator/status_test.go +++ b/pkg/operator/status_test.go @@ -30,9 +30,10 @@ import ( func TestIsMachineConfigPoolConfigurationValid(t *testing.T) { configNotFound := errors.New("Config Not Found") type config struct { - name string - version string - releaseVersion string + name string + version string + releaseVersion string + osimageurlOverridden bool } tests := []struct { @@ -119,22 +120,42 @@ func TestIsMachineConfigPoolConfigurationValid(t *testing.T) { generated: "g", err: errors.New("osImageURL mismatch for dummy-pool in g expected: myurl got: wrongurl"), }, { + // This is specifically testing that we can make sure the operator status check + // allows a mismatched URL if it's a user override for Phase 0 layering knownConfigs: []config{{ - name: "g", - version: "v2", - releaseVersion: "rv1", + name: "g", + version: "v2", + releaseVersion: "rv2", + osimageurlOverridden: true, }, { name: "c-0", version: "v2", - releaseVersion: "rv1", + releaseVersion: "rv2", }, { name: "u-0", }}, source: []string{"c-0", "u-0"}, - testurl: "myurl", + testurl: "overriddenurl", generated: "g", - err: errors.New("release image version mismatch for dummy-pool in g expected: rv2 got: rv1"), - }} + err: nil, + }, + { + knownConfigs: []config{{ + name: "g", + version: "v2", + releaseVersion: "rv1", + }, { + name: "c-0", + version: "v2", + releaseVersion: "rv1", + }, { + name: "u-0", + }}, + source: []string{"c-0", "u-0"}, + testurl: "myurl", + generated: "g", + err: errors.New("release image version mismatch for dummy-pool in g expected: rv2 got: rv1"), + }} for idx, test := range tests { t.Run(fmt.Sprintf("case #%d", idx), func(t *testing.T) { @@ -148,6 +169,9 @@ func TestIsMachineConfigPoolConfigurationValid(t *testing.T) { if c.releaseVersion != "" { annos[ctrlcommon.ReleaseImageVersionAnnotationKey] = c.releaseVersion } + if c.osimageurlOverridden { + annos[ctrlcommon.OSImageURLOverriddenKey] = "true" + } return &mcfgv1.MachineConfig{ ObjectMeta: metav1.ObjectMeta{ Name: c.name, From da2228fde59559086009437d403fc633ff4d4a77 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Sat, 10 Sep 2022 01:26:41 -0500 Subject: [PATCH 09/15] Gate "new image format by default" on boolean var This adds an "image selection function" to the controller helpers and a boolean variable in helpers.go that determines which image it will use by default. The intent is that it is that it be set to false by default here, rendering the "new image format by default" functionality inert, but that a later commit will enable it. This logic can be taken out later once we get rid of machine-os-content. --- pkg/controller/common/helpers.go | 28 ++++++++++++++++++---- pkg/controller/common/helpers_test.go | 4 +++- pkg/controller/render/render_controller.go | 11 +++++---- pkg/controller/template/render.go | 8 ++++++- pkg/operator/sync.go | 9 ++++++- 5 files changed, 48 insertions(+), 12 deletions(-) diff --git a/pkg/controller/common/helpers.go b/pkg/controller/common/helpers.go index 1edd3cdc5f..9fd4b5bb03 100644 --- a/pkg/controller/common/helpers.go +++ b/pkg/controller/common/helpers.go @@ -48,6 +48,9 @@ import ( mcfgclientset "github.com/openshift/machine-config-operator/pkg/generated/clientset/versioned" ) +// Gates whether or not the MCO uses the new format base OS container image by default +var UseNewFormatImageByDefault = false + // strToPtr converts the input string to a pointer to itself func strToPtr(s string) *string { return &s @@ -131,18 +134,26 @@ func MergeMachineConfigs(configs []*mcfgv1.MachineConfig, cconfig *mcfgv1.Contro // For layering, we want to let the user override OSImageURL again // The template configs always match what's in controllerconfig because they get rendered from there, // so the only way we get an override here is if the user adds something different - osImageURL := cconfig.Spec.OSImageURL + osImageURL := GetDefaultBaseImageContainer(&cconfig.Spec) for _, cfg := range configs { if cfg.Spec.OSImageURL != "" { osImageURL = cfg.Spec.OSImageURL } } + // Allow overriding the extensions container + baseOSExtensionsContainerImage := cconfig.Spec.BaseOSExtensionsContainerImage + for _, cfg := range configs { + if cfg.Spec.BaseOSExtensionsContainerImage != "" { + baseOSExtensionsContainerImage = cfg.Spec.BaseOSExtensionsContainerImage + } + } + return &mcfgv1.MachineConfig{ Spec: mcfgv1.MachineConfigSpec{ - OSImageURL: osImageURL, - BaseOSExtensionsContainerImage: cconfig.Spec.BaseOSExtensionsContainerImage, - KernelArguments: kargs, + OSImageURL: osImageURL, + BaseOSExtensionsContainerImage: baseOSExtensionsContainerImage, + KernelArguments: kargs, Config: runtime.RawExtension{ Raw: rawOutIgn, }, @@ -970,3 +981,12 @@ func GetLongestValidCertificate(certificateList []*x509.Certificate, subjectPref } return nil } + +// GetDefaultBaseImageContainer is kind of a "soft feature gate" for using the "new format" image by default, its behavior +// is determined by the "UseNewFormatImageByDefault" boolean +func GetDefaultBaseImageContainer(cconfigspec *mcfgv1.ControllerConfigSpec) string { + if UseNewFormatImageByDefault { + return cconfigspec.BaseOSContainerImage + } + return cconfigspec.OSImageURL +} diff --git a/pkg/controller/common/helpers_test.go b/pkg/controller/common/helpers_test.go index 2ad2e84b49..5089b5c1cb 100644 --- a/pkg/controller/common/helpers_test.go +++ b/pkg/controller/common/helpers_test.go @@ -236,6 +236,7 @@ func TestMergeMachineConfigs(t *testing.T) { // variable setup cconfig := &mcfgv1.ControllerConfig{} cconfig.Spec.OSImageURL = "testURL" + cconfig.Spec.BaseOSContainerImage = "newformatURL" fips := true kargs := []string{"testKarg"} extensions := []string{"testExtensions"} @@ -261,7 +262,8 @@ func TestMergeMachineConfigs(t *testing.T) { require.Nil(t, err) expectedMachineConfig := &mcfgv1.MachineConfig{ Spec: mcfgv1.MachineConfigSpec{ - OSImageURL: cconfig.Spec.OSImageURL, + // TODO(jkyros): take this back out when we drop machine-os-content + OSImageURL: GetDefaultBaseImageContainer(&cconfig.Spec), KernelArguments: []string{}, Config: runtime.RawExtension{ Raw: rawOutIgn, diff --git a/pkg/controller/render/render_controller.go b/pkg/controller/render/render_controller.go index dfac75d5c3..fe4afe1bc3 100644 --- a/pkg/controller/render/render_controller.go +++ b/pkg/controller/render/render_controller.go @@ -488,7 +488,7 @@ func (ctrl *Controller) syncGeneratedMachineConfig(pool *mcfgv1.MachineConfigPoo } // Emit event and collect metric when OSImageURL was overridden. - if generated.Spec.OSImageURL != cc.Spec.OSImageURL { + if generated.Spec.OSImageURL != ctrlcommon.GetDefaultBaseImageContainer(&cc.Spec) { ctrlcommon.OSImageURLOverride.WithLabelValues(pool.Name).Set(1) ctrl.eventRecorder.Eventf(generated, corev1.EventTypeNormal, "OSImageURLOverridden", "OSImageURL was overridden via machineconfig in %s (was: %s is: %s)", generated.Name, cc.Spec.OSImageURL, generated.Spec.OSImageURL) } else { @@ -586,10 +586,11 @@ func generateRenderedMachineConfig(pool *mcfgv1.MachineConfigPool, configs []*mc merged.Annotations[ctrlcommon.GeneratedByControllerVersionAnnotationKey] = version.Hash merged.Annotations[ctrlcommon.ReleaseImageVersionAnnotationKey] = cconfig.Annotations[ctrlcommon.ReleaseImageVersionAnnotationKey] - // Make it obvious that the OSImageURL has been overridden. If we log this in MergeMachineConfigs, we don't know the name yet, so we're - // logging out here instead so it's actually helpful. - if merged.Spec.OSImageURL != cconfig.Spec.OSImageURL { - glog.Infof("OSImageURL has been overridden via machineconfig in %s (was: %s is: %s)", merged.Name, cconfig.Spec.OSImageURL, merged.Spec.OSImageURL) + // The operator needs to know the user overrode this, so it knows if it needs to skip the + // OSImageURL check during upgrade -- if the user took over managing OS upgrades this way, + // the operator shouldn't stop the rest of the upgrade from progressing/completing. + if merged.Spec.OSImageURL != ctrlcommon.GetDefaultBaseImageContainer(&cconfig.Spec) { + merged.Annotations[ctrlcommon.OSImageURLOverriddenKey] = "true" } return merged, nil diff --git a/pkg/controller/template/render.go b/pkg/controller/template/render.go index 35a1ea04a4..48368872a5 100644 --- a/pkg/controller/template/render.go +++ b/pkg/controller/template/render.go @@ -293,8 +293,14 @@ func generateMachineConfigForName(config *RenderConfig, role, name, templateDir, if err != nil { return nil, fmt.Errorf("error creating MachineConfig from Ignition config: %w", err) } + + // TODO(jkyros): you might think you can remove this since we override later when we merge + // config, but resourcemerge doesn't blank this field out once it's populated + // so if you end up on a cluster where it was ever populated in this machineconfig, it + // will keep that last value forever once you upgrade...which is a problen now that we allow OSImageURL overrides + // because it will look like an override when it shouldn't be. So don't take this out until you've solved that. // And inject the osimageurl here - mcfg.Spec.OSImageURL = config.OSImageURL + mcfg.Spec.OSImageURL = ctrlcommon.GetDefaultBaseImageContainer(config.ControllerConfigSpec) return mcfg, nil } diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 3a711f03fb..3c1f3d4811 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -692,12 +692,19 @@ func (optr *Operator) syncRequiredMachineConfigPools(_ *renderConfig) error { _, hasRequiredPoolLabel := pool.Labels[requiredForUpgradeMachineConfigPoolLabelKey] if hasRequiredPoolLabel { - _, _, opURL, err := optr.getOsImageURLs(optr.namespace) + newFormatOpURL, _, opURL, err := optr.getOsImageURLs(optr.namespace) if err != nil { glog.Errorf("Error getting configmap osImageURL: %q", err) return false, nil } releaseVersion, _ := optr.vStore.Get("operator") + + // TODO(jkyros): The operator looks at the osimageurl configmap directly, so we can't use + // our centralized default image selection helper, but we can still use the constant. + // This will come out once we drop machine-os-content. + if ctrlcommon.UseNewFormatImageByDefault { + opURL = newFormatOpURL + } if err := isMachineConfigPoolConfigurationValid(pool, version.Hash, releaseVersion, opURL, optr.mcLister.Get); err != nil { lastErr = fmt.Errorf("pool %s has not progressed to latest configuration: %w, retrying", pool.Name, err) syncerr := optr.syncUpgradeableStatus() From e505cb706037c4350696ade6dd712956fef58aca Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Sat, 10 Sep 2022 03:09:33 -0500 Subject: [PATCH 10/15] Enable "use new format image by default" This just flips our "gating boolean" to true so that the MCO will use the new format image by default. --- pkg/controller/common/helpers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/common/helpers.go b/pkg/controller/common/helpers.go index 9fd4b5bb03..3f0d32c792 100644 --- a/pkg/controller/common/helpers.go +++ b/pkg/controller/common/helpers.go @@ -49,7 +49,7 @@ import ( ) // Gates whether or not the MCO uses the new format base OS container image by default -var UseNewFormatImageByDefault = false +var UseNewFormatImageByDefault = true // strToPtr converts the input string to a pointer to itself func strToPtr(s string) *string { From e26e51e4262bfd53234b1e90cf0ea9995d05e0a9 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Tue, 13 Sep 2022 01:58:21 -0500 Subject: [PATCH 11/15] Exclude extensions container for FCOS This excludes all non-base (right now, only extensions) images from image-references if the build is using fcos, because we currently don't ship fcos extensions images, and including them would break the payload. This also removes the baseOSExtensionsContainerImage from the osimagurl configmap so the placeholder value doesn't get passed through. (oc won't rewrite it if it's not in image-references). --- Dockerfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ff144d794b..ab4d4470d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,14 @@ ARG TAGS="" COPY --from=builder /go/src/github.com/openshift/machine-config-operator/instroot.tar /tmp/instroot.tar RUN cd / && tar xf /tmp/instroot.tar && rm -f /tmp/instroot.tar COPY install /manifests -RUN if [[ "${TAGS}" == "fcos" ]]; then sed -i 's/rhel-coreos-8/fedora-coreos/g' /manifests/*; \ + +RUN if [[ "${TAGS}" == "fcos" ]]; then \ + # comment out non-base/extensions image-references entirely for fcos + sed -i '/- name: rhel-coreos-8-/,+3 s/^/#/' /manifests/image-references && \ + # also remove extensions from the osimageurl configmap (if we don't, oc won't rewrite it, and the placeholder value will survive and get used) + sed -i '/baseOSExtensionsContainerImage:/ s/^/#/' /manifests/0000_80_machine-config-operator_05_osimageurl.yaml && \ + # rewrite image names for fcos + sed -i 's/rhel-coreos-8/fedora-coreos/g' /manifests/* ; \ elif [[ "${TAGS}" == "scos" ]]; then sed -i 's/rhel-coreos-8/centos-stream-coreos-9/g' /manifests/*; fi && \ if ! rpm -q util-linux; then yum install -y util-linux && yum clean all && rm -rf /var/cache/yum/*; fi COPY templates /etc/mcc/templates From b63b3190c5081b04fef3fb0771cdca1d311ed143 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Wed, 14 Sep 2022 11:19:47 -0400 Subject: [PATCH 12/15] MCO-356: daemon/firstboot: Do a secondary in-place update if rpm-ostree is too old xref https://github.com/coreos/rpm-ostree/commit/89f58028f0bea5b6fa59bdb3506078e09957ec00 Change the logic for firstboot OS updates to detect if rpm-ostree is too old to natively fetch a container image on its own. If so, we use `ex deploy-from-self` via podman. Introduce a new `/etc/machine-config-daemon-force-once` file that will cause us to skip validation, and hence we should re-reconcile and attempt to apply the OS upgrade again, this time natively via rpm-ostree. This is needed for scaleup of old nodes, as well as temporarily for 4.11 upgrades. --- go.mod | 2 +- pkg/daemon/daemon.go | 45 +++++++++++++++++++++++++++++++---- pkg/daemon/rpm-ostree.go | 42 ++++++++++++++++++++++++++++++++ pkg/daemon/rpm-ostree_test.go | 27 +++++++++++++++++++++ pkg/daemon/update.go | 31 +++++++++++++++++++++--- 5 files changed, 138 insertions(+), 9 deletions(-) create mode 100644 pkg/daemon/rpm-ostree_test.go diff --git a/go.mod b/go.mod index 961c871047..6deec1fd5d 100644 --- a/go.mod +++ b/go.mod @@ -285,7 +285,7 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.66.6 // indirect gopkg.in/square/go-jose.v2 v2.6.0 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 // indirect honnef.co/go/tools v0.3.3 // indirect k8s.io/apiserver v0.25.1 // indirect diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 91cba1c1de..d480b7895a 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -108,6 +108,9 @@ type Daemon struct { // booting is true when all initial synchronization to the target // machineconfig is done booting bool + // needSecondaryReboot is used for https://issues.redhat.com/browse/MCO-356 + // when we have an old rpm-ostree and need to reboot into the new one. + needSecondaryReboot bool currentConfigPath string @@ -884,6 +887,31 @@ func (dn *Daemon) RunFirstbootCompleteMachineconfig() error { if err != nil { return fmt.Errorf("failed to parse MachineConfig: %w", err) } + newEnough, err := RpmOstreeIsNewEnoughForLayering() + if err != nil { + return err + } + + // If the host isn't new enough to understand the new container model natively, run as a privileged container. + // See https://github.com/coreos/rpm-ostree/pull/3961 and https://issues.redhat.com/browse/MCO-356 + // This currently will incur a double reboot; see https://github.com/coreos/rpm-ostree/issues/4018 + if !newEnough { + dn.logSystem("rpm-ostree is not new enough for new-format image; forcing an update via container and queuing immediate reboot") + err := runCmdSync("systemd-run", "--unit", "machine-config-daemon-update-rpmostree-via-container", "--collect", "--wait", "--", "podman", "run", "--authfile", "/var/lib/kubelet/config.json", "--privileged", "--pid=host", "--net=host", "--rm", "-v", "/:/run/host", mc.Spec.OSImageURL, "rpm-ostree", "ex", "deploy-from-self", "/run/host") + if err != nil { + return err + } + rebootCmd := rebootCommand("extra reboot for in-place update") + if err := rebootCmd.Run(); err != nil { + dn.logSystem("failed to run reboot: %v", err) + return err + } + // wait to be killed via SIGTERM + time.Sleep(defaultRebootTimeout) + return fmt.Errorf("failed to reboot for secondary in-place update") + } + + glog.Info("rpm-ostree has container feature") // Start with an empty config, then add our *booted* osImageURL to // it, reflecting the current machine state. @@ -1407,9 +1435,11 @@ func (dn *Daemon) checkStateOnFirstRun() error { return err } var pendingConfigName, bootID string + var pendingSpecifiesForce bool if pendingState != nil { pendingConfigName = pendingState.Message bootID = pendingState.BootID + pendingSpecifiesForce = pendingState.Force == "true" } // XXX: drop this // we need this compatibility layer for now @@ -1456,7 +1486,7 @@ func (dn *Daemon) checkStateOnFirstRun() error { targetOSImageURL := state.currentConfig.Spec.OSImageURL osMatch := dn.checkOS(targetOSImageURL) if !osMatch { - glog.Infof("Bootstrap pivot required to: %s", targetOSImageURL) + dn.logSystem("Bootstrap pivot required to: %s", targetOSImageURL) // Check to see if we have a layered/new format image isLayeredImage, err := dn.NodeUpdaterClient.IsBootableImage(targetOSImageURL) @@ -1487,7 +1517,7 @@ func (dn *Daemon) checkStateOnFirstRun() error { } return dn.reboot(fmt.Sprintf("Node will reboot into config %v", state.currentConfig.GetName())) } - glog.Info("No bootstrap pivot required; unlinking bootstrap node annotations") + dn.logSystem("No bootstrap pivot required; unlinking bootstrap node annotations") // Rename the bootstrap node annotations; the // currentConfig's osImageURL should now be *truth*. @@ -1532,8 +1562,13 @@ func (dn *Daemon) checkStateOnFirstRun() error { expectedConfig = state.currentConfig } - if forceFileExists() { - glog.Infof("Skipping on-disk validation; %s present", constants.MachineConfigDaemonForceFile) + force := pendingSpecifiesForce || forceFileExists() + if force { + if pendingSpecifiesForce { + dn.logSystem("Skipping on-disk validation; pending config specifies forcing") + } else { + dn.logSystem("Skipping on-disk validation; %s present", constants.MachineConfigDaemonForceFile) + } return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig) } @@ -1543,7 +1578,7 @@ func (dn *Daemon) checkStateOnFirstRun() error { return wErr } - glog.Info("Validated on-disk state") + dn.logSystem("Validated on-disk state") // We've validated state. Now, ensure that node is in desired state var inDesiredConfig bool diff --git a/pkg/daemon/rpm-ostree.go b/pkg/daemon/rpm-ostree.go index 346de6c6e1..76432b60b4 100644 --- a/pkg/daemon/rpm-ostree.go +++ b/pkg/daemon/rpm-ostree.go @@ -14,6 +14,7 @@ import ( "github.com/golang/glog" "github.com/opencontainers/go-digest" pivotutils "github.com/openshift/machine-config-operator/pkg/daemon/pivot/utils" + "gopkg.in/yaml.v2" ) const ( @@ -346,6 +347,47 @@ func (r *RpmOstreeClient) IsBootableImage(imgURL string) (bool, error) { return isBootableImage == "true", nil } +// RpmOstreeIsNewEnoughForLayering returns true if the version of rpm-ostree on the +// host system is new enough for layering. +// VersionData represents the static information about rpm-ostree. +type VersionData struct { + Version string `yaml:"Version"` + Features []string `yaml:"Features"` + Git string `yaml:"Git"` +} + +type RpmOstreeVersionData struct { + Root VersionData `yaml:"rpm-ostree"` +} + +// RpmOstreeVersion returns the running rpm-ostree version number +func rpmOstreeVersion() (*VersionData, error) { + buf, err := runGetOut("rpm-ostree", "--version") + if err != nil { + return nil, err + } + + var q RpmOstreeVersionData + if err := yaml.Unmarshal(buf, &q); err != nil { + return nil, fmt.Errorf("failed to parse `rpm-ostree --version` output: %w", err) + } + + return &q.Root, nil +} + +func RpmOstreeIsNewEnoughForLayering() (bool, error) { + verdata, err := rpmOstreeVersion() + if err != nil { + return false, err + } + for _, v := range verdata.Features { + if v == "container" { + return true, nil + } + } + return false, nil +} + // RebaseLayered rebases system or errors if already rebased func (r *RpmOstreeClient) RebaseLayered(imgURL string) (err error) { glog.Infof("Executing rebase to %s", imgURL) diff --git a/pkg/daemon/rpm-ostree_test.go b/pkg/daemon/rpm-ostree_test.go new file mode 100644 index 0000000000..31a8a10dc9 --- /dev/null +++ b/pkg/daemon/rpm-ostree_test.go @@ -0,0 +1,27 @@ +package daemon + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "gopkg.in/yaml.v2" +) + +func TestParseVersion(t *testing.T) { + verdata := `rpm-ostree: + Version: '2022.10' + Git: 6b302116c969397fd71899e3b9bb3b8c100d1af9 + Features: + - rust + - compose + - rhsm +` + var q RpmOstreeVersionData + if err := yaml.UnmarshalStrict([]byte(verdata), &q); err != nil { + panic(err) + } + + assert.Equal(t, "2022.10", q.Root.Version) + assert.Contains(t, q.Root.Features, "rust") + assert.NotContains(t, q.Root.Features, "container") +} diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index f8fe0aae3d..387972e5b0 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1908,8 +1908,31 @@ func (dn *Daemon) updateOS(config *mcfgv1.MachineConfig, osImageContentDir strin func (dn *Daemon) updateLayeredOS(config *mcfgv1.MachineConfig) error { newURL := config.Spec.OSImageURL glog.Infof("Updating OS to layered image %s", newURL) - if err := dn.NodeUpdaterClient.RebaseLayered(newURL); err != nil { - return fmt.Errorf("failed to update OS to %s : %w", newURL, err) + + newEnough, err := RpmOstreeIsNewEnoughForLayering() + if err != nil { + return err + } + // If the host isn't new enough to understand the new container model natively, run as a privileged container. + // See https://github.com/coreos/rpm-ostree/pull/3961 and https://issues.redhat.com/browse/MCO-356 + // This currently will incur a double reboot; see https://github.com/coreos/rpm-ostree/issues/4018 + if !newEnough { + dn.logSystem("rpm-ostree is not new enough for layering; forcing an update via container") + err := runCmdSync("systemd-run", "--unit", "machine-config-daemon-update-rpmostree-via-container", "--collect", "--wait", "--", "podman", "run", "--authfile", "/var/lib/kubelet/config.json", "--privileged", "--pid=host", "--net=host", "--rm", "-v", "/:/run/host", newURL, "rpm-ostree", "ex", "deploy-from-self", "/run/host") + if err != nil { + return err + } + dn.needSecondaryReboot = true + // We'll need to do a second reconciliation pass i.e. an extra reboot today, but that won't + // be necessary after we ship the newer rpm-ostree into older releases. + // See also https://github.com/coreos/rpm-ostree/issues/4018 + if err := os.WriteFile(constants.MachineConfigDaemonPersistentForceOnceFile, []byte(""), 0o644); err != nil { + return err + } + } else { + if err := dn.NodeUpdaterClient.RebaseLayered(newURL); err != nil { + return fmt.Errorf("failed to update OS to %s : %w", newURL, err) + } } return nil @@ -1957,6 +1980,7 @@ type journalMsg struct { Message string `json:"MESSAGE,omitempty"` BootID string `json:"BOOT_ID,omitempty"` Pending string `json:"PENDING,omitempty"` + Force string `json:"MCO_FORCE,omitempty"` OldLogger string `json:"OPENSHIFT_MACHINE_CONFIG_DAEMON_LEGACY_LOG_HACK,omitempty"` // unused today } @@ -2020,7 +2044,8 @@ func (dn *Daemon) storePendingState(pending *mcfgv1.MachineConfig, isPending int pendingState.WriteString(fmt.Sprintf(`MESSAGE_ID=%s MESSAGE=%s BOOT_ID=%s -PENDING=%d`, pendingStateMessageID, pending.GetName(), dn.bootID, isPending)) +MCO_FORCE=%v +PENDING=%d`, pendingStateMessageID, pending.GetName(), dn.bootID, dn.needSecondaryReboot, isPending)) logger.Stdin = &pendingState return logger.CombinedOutput() From 9c6b02186896cab6479a29f78b7dd9aebedd372a Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Mon, 19 Sep 2022 00:53:28 -0500 Subject: [PATCH 13/15] Make verification pass if commit hashes match rpm-ostree deploy-from-self does get us to the right image, but the MCO doesn't know that because the OSImageURL/Container Name/Custom Deployment name doesn't get set when it rebases, it just uses the commit hash. This tells the MCO to: - inspect the current image specified by OSImageURL, - grab its commit hash from the labels - compare it to the base commit of what we have on-disk. If they match, we know we are on the correct image, even if we did not get there the usual way. --- pkg/daemon/daemon.go | 25 ++++++++++++++++++++++--- pkg/daemon/rpm-ostree.go | 24 +++++++++++++++++++----- pkg/daemon/update.go | 16 +++++----------- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index d480b7895a..fbce7b3df6 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -64,6 +64,9 @@ type Daemon struct { // bootedOSImageURL is the currently booted URL of the operating system bootedOSImageURL string + // bootedOScommit is the commit hash of the currently booted operating system + bootedOSCommit string + // kubeClient allows interaction with Kubernetes, including the node we are running on. kubeClient kubernetes.Interface @@ -213,6 +216,7 @@ func New( var ( osImageURL string osVersion string + osCommit string err error ) @@ -231,11 +235,11 @@ func New( if err != nil { return nil, fmt.Errorf("error initializing rpm-ostree: %w", err) } - osImageURL, osVersion, err = nodeUpdaterClient.GetBootedOSImageURL() + osImageURL, osVersion, osCommit, err = nodeUpdaterClient.GetBootedOSImageURL() if err != nil { return nil, fmt.Errorf("error reading osImageURL from rpm-ostree: %w", err) } - glog.Infof("Booted osImageURL: %s (%s)", osImageURL, osVersion) + glog.Infof("Booted osImageURL: %s (%s) %s", osImageURL, osVersion, osCommit) } bootID := "" @@ -267,6 +271,7 @@ func New( os: hostos, NodeUpdaterClient: nodeUpdaterClient, bootedOSImageURL: osImageURL, + bootedOSCommit: osCommit, bootID: bootID, exitCh: exitCh, currentConfigPath: currentConfigPath, @@ -1872,7 +1877,7 @@ func (dn *Daemon) validateOnDiskState(currentConfig *mcfgv1.MachineConfig) error // Be sure we're booted into the OS we expect osMatch := dn.checkOS(currentConfig.Spec.OSImageURL) if !osMatch { - return fmt.Errorf("expected target osImageURL %q, have %q", currentConfig.Spec.OSImageURL, dn.bootedOSImageURL) + return fmt.Errorf("expected target osImageURL %q, have %q (%q)", currentConfig.Spec.OSImageURL, dn.bootedOSImageURL, dn.bootedOSCommit) } if dn.os.IsCoreOSVariant() { @@ -1898,6 +1903,20 @@ func (dn *Daemon) checkOS(osImageURL string) bool { return true } + // TODO(jkyros): the header for this functions says "if the digests match" + // so I'm wondering if at one point this used to work this way.... + // TODO(jkyros): and pulling it just to check is so expensive, skopeo works now + // if you use the --no-tags argument (doesn't pull tags) so we should probably just do that + inspection, err := imageInspect(osImageURL) + if err != nil { + glog.Warningf("Unable to check manifest for matching hash: %s", err) + } else if ostreeCommit, ok := inspection.Labels["ostree.commit"]; ok { + if ostreeCommit == dn.bootedOSCommit { + glog.Infof("We are technically in the right image even if the URL doesn't match (%s == %s)", ostreeCommit, osImageURL) + return true + } + } + return dn.bootedOSImageURL == osImageURL } diff --git a/pkg/daemon/rpm-ostree.go b/pkg/daemon/rpm-ostree.go index 76432b60b4..bca0ad4e17 100644 --- a/pkg/daemon/rpm-ostree.go +++ b/pkg/daemon/rpm-ostree.go @@ -38,6 +38,7 @@ type RpmOstreeDeployment struct { OSName string `json:"osname"` Serial int32 `json:"serial"` Checksum string `json:"checksum"` + BaseChecksum string `json:"base-checksum,omitempty"` Version string `json:"version"` Timestamp uint64 `json:"timestamp"` Booted bool `json:"booted"` @@ -68,7 +69,7 @@ type imageInspection struct { type NodeUpdaterClient interface { Initialize() error GetStatus() (string, error) - GetBootedOSImageURL() (string, string, error) + GetBootedOSImageURL() (string, string, string, error) Rebase(string, string) (bool, error) RebaseLayered(string) error IsBootableImage(string) (bool, error) @@ -169,16 +170,21 @@ func (r *RpmOstreeClient) GetStatus() (string, error) { return string(output), nil } -// GetBootedOSImageURL returns the image URL as well as the OSTree version (for logging) +// GetBootedOSImageURL returns the image URL as well as the OSTree version(for logging) and the ostree commit (for comparisons) // Returns the empty string if the host doesn't have a custom origin that matches pivot:// // (This could be the case for e.g. FCOS, or a future RHCOS which comes not-pivoted by default) -func (r *RpmOstreeClient) GetBootedOSImageURL() (string, string, error) { +func (r *RpmOstreeClient) GetBootedOSImageURL() (string, string, string, error) { bootedDeployment, _, err := r.GetBootedAndStagedDeployment() if err != nil { - return "", "", err + return "", "", "", err } + // TODO(jkyros): take this out, I just want to see when/why it's empty? + j, _ := json.MarshalIndent(bootedDeployment, "", " ") + glog.Infof("%s", j) + // the canonical image URL is stored in the custom origin field. + osImageURL := "" if len(bootedDeployment.CustomOrigin) > 0 { if strings.HasPrefix(bootedDeployment.CustomOrigin[0], "pivot://") { @@ -194,7 +200,15 @@ func (r *RpmOstreeClient) GetBootedOSImageURL() (string, string, error) { osImageURL = tokens[1] } } - return osImageURL, bootedDeployment.Version, nil + + // BaseChecksum is populated if the system has been modified in a way that changes the base checksum + // (like via an RPM install) so prefer BaseCheksum that if it's present, otherwise just use Checksum. + baseChecksum := bootedDeployment.Checksum + if bootedDeployment.BaseChecksum != "" { + baseChecksum = bootedDeployment.BaseChecksum + } + + return osImageURL, bootedDeployment.Version, baseChecksum, nil } func podmanInspect(imgURL string) (imgdata *imageInspection, err error) { diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 387972e5b0..d2872d477c 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1922,17 +1922,11 @@ func (dn *Daemon) updateLayeredOS(config *mcfgv1.MachineConfig) error { if err != nil { return err } - dn.needSecondaryReboot = true - // We'll need to do a second reconciliation pass i.e. an extra reboot today, but that won't - // be necessary after we ship the newer rpm-ostree into older releases. - // See also https://github.com/coreos/rpm-ostree/issues/4018 - if err := os.WriteFile(constants.MachineConfigDaemonPersistentForceOnceFile, []byte(""), 0o644); err != nil { - return err - } - } else { - if err := dn.NodeUpdaterClient.RebaseLayered(newURL); err != nil { - return fmt.Errorf("failed to update OS to %s : %w", newURL, err) - } + + // TODO(jkyros): we don't need to do anything special here if I teach rpm-ostree this is the same container + + } else if err := dn.NodeUpdaterClient.RebaseLayered(newURL); err != nil { + return fmt.Errorf("failed to update OS to %s : %w", newURL, err) } return nil From f7a1c7b2c496477083ffc0f68e8b737aff15bc67 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 20 Sep 2022 08:56:21 -0400 Subject: [PATCH 14/15] daemon: Deduplicate `ex deploy-from-self` paths --- pkg/daemon/daemon.go | 3 +-- pkg/daemon/update.go | 13 ++++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index fbce7b3df6..31d87a5ac5 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -902,8 +902,7 @@ func (dn *Daemon) RunFirstbootCompleteMachineconfig() error { // This currently will incur a double reboot; see https://github.com/coreos/rpm-ostree/issues/4018 if !newEnough { dn.logSystem("rpm-ostree is not new enough for new-format image; forcing an update via container and queuing immediate reboot") - err := runCmdSync("systemd-run", "--unit", "machine-config-daemon-update-rpmostree-via-container", "--collect", "--wait", "--", "podman", "run", "--authfile", "/var/lib/kubelet/config.json", "--privileged", "--pid=host", "--net=host", "--rm", "-v", "/:/run/host", mc.Spec.OSImageURL, "rpm-ostree", "ex", "deploy-from-self", "/run/host") - if err != nil { + if err := dn.InplaceUpdateViaNewContainer(mc.Spec.OSImageURL); err != nil { return err } rebootCmd := rebootCommand("extra reboot for in-place update") diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index d2872d477c..fe415943ea 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1904,6 +1904,13 @@ func (dn *Daemon) updateOS(config *mcfgv1.MachineConfig, osImageContentDir strin return nil } +// InplaceUpdateViaNewContainer runs rpm-ostree ex deploy-via-self +// via a privileged container. This is needed on firstboot of old +// nodes as well as temporarily for 4.11 -> 4.12 upgrades. +func (dn *Daemon) InplaceUpdateViaNewContainer(target string) error { + return runCmdSync("systemd-run", "--unit", "machine-config-daemon-update-rpmostree-via-container", "--collect", "--wait", "--", "podman", "run", "--authfile", "/var/lib/kubelet/config.json", "--privileged", "--pid=host", "--net=host", "--rm", "-v", "/:/run/host", target, "rpm-ostree", "ex", "deploy-from-self", "/run/host") +} + // updateLayeredOS updates the system OS to the one specified in newConfig func (dn *Daemon) updateLayeredOS(config *mcfgv1.MachineConfig) error { newURL := config.Spec.OSImageURL @@ -1918,13 +1925,9 @@ func (dn *Daemon) updateLayeredOS(config *mcfgv1.MachineConfig) error { // This currently will incur a double reboot; see https://github.com/coreos/rpm-ostree/issues/4018 if !newEnough { dn.logSystem("rpm-ostree is not new enough for layering; forcing an update via container") - err := runCmdSync("systemd-run", "--unit", "machine-config-daemon-update-rpmostree-via-container", "--collect", "--wait", "--", "podman", "run", "--authfile", "/var/lib/kubelet/config.json", "--privileged", "--pid=host", "--net=host", "--rm", "-v", "/:/run/host", newURL, "rpm-ostree", "ex", "deploy-from-self", "/run/host") - if err != nil { + if err := dn.InplaceUpdateViaNewContainer(newURL); err != nil { return err } - - // TODO(jkyros): we don't need to do anything special here if I teach rpm-ostree this is the same container - } else if err := dn.NodeUpdaterClient.RebaseLayered(newURL); err != nil { return fmt.Errorf("failed to update OS to %s : %w", newURL, err) } From 52f0cf0db6d885ec40de2917e8813531a180dfc4 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 20 Sep 2022 09:10:32 -0400 Subject: [PATCH 15/15] daemon: Remove more leftover cruft from double-reboot attempt Not needed anymore. --- pkg/daemon/daemon.go | 14 ++------------ pkg/daemon/update.go | 4 +--- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 31d87a5ac5..4b38d4c482 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -111,9 +111,6 @@ type Daemon struct { // booting is true when all initial synchronization to the target // machineconfig is done booting bool - // needSecondaryReboot is used for https://issues.redhat.com/browse/MCO-356 - // when we have an old rpm-ostree and need to reboot into the new one. - needSecondaryReboot bool currentConfigPath string @@ -1439,11 +1436,9 @@ func (dn *Daemon) checkStateOnFirstRun() error { return err } var pendingConfigName, bootID string - var pendingSpecifiesForce bool if pendingState != nil { pendingConfigName = pendingState.Message bootID = pendingState.BootID - pendingSpecifiesForce = pendingState.Force == "true" } // XXX: drop this // we need this compatibility layer for now @@ -1566,13 +1561,8 @@ func (dn *Daemon) checkStateOnFirstRun() error { expectedConfig = state.currentConfig } - force := pendingSpecifiesForce || forceFileExists() - if force { - if pendingSpecifiesForce { - dn.logSystem("Skipping on-disk validation; pending config specifies forcing") - } else { - dn.logSystem("Skipping on-disk validation; %s present", constants.MachineConfigDaemonForceFile) - } + if forceFileExists() { + dn.logSystem("Skipping on-disk validation; %s present", constants.MachineConfigDaemonForceFile) return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig) } diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index fe415943ea..8bfa1fc0cd 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1977,7 +1977,6 @@ type journalMsg struct { Message string `json:"MESSAGE,omitempty"` BootID string `json:"BOOT_ID,omitempty"` Pending string `json:"PENDING,omitempty"` - Force string `json:"MCO_FORCE,omitempty"` OldLogger string `json:"OPENSHIFT_MACHINE_CONFIG_DAEMON_LEGACY_LOG_HACK,omitempty"` // unused today } @@ -2041,8 +2040,7 @@ func (dn *Daemon) storePendingState(pending *mcfgv1.MachineConfig, isPending int pendingState.WriteString(fmt.Sprintf(`MESSAGE_ID=%s MESSAGE=%s BOOT_ID=%s -MCO_FORCE=%v -PENDING=%d`, pendingStateMessageID, pending.GetName(), dn.bootID, dn.needSecondaryReboot, isPending)) +PENDING=%d`, pendingStateMessageID, pending.GetName(), dn.bootID, isPending)) logger.Stdin = &pendingState return logger.CombinedOutput()