Skip to content

Commit

Permalink
baremetal: monitor bootstrap process
Browse files Browse the repository at this point in the history
  • Loading branch information
honza committed Apr 3, 2024
1 parent 9c98f76 commit c1e81b3
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 5 deletions.
22 changes: 17 additions & 5 deletions cmd/openshift-install/create.go
Expand Up @@ -52,6 +52,7 @@ import (
"github.com/openshift/installer/pkg/types/baremetal"
"github.com/openshift/installer/pkg/types/gcp"
"github.com/openshift/installer/pkg/types/vsphere"
baremetalutils "github.com/openshift/installer/pkg/utils/baremetal"
cov1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers"
"github.com/openshift/library-go/pkg/route/routeapihelpers"
)
Expand Down Expand Up @@ -435,7 +436,7 @@ func waitForBootstrapComplete(ctx context.Context, config *rest.Config) *cluster
return newAPIError(err)
}

if err := waitForBootstrapConfigMap(ctx, client); err != nil {
if err := waitForBootstrapConfigMap(ctx, config, client); err != nil {
return err
}

Expand All @@ -449,18 +450,22 @@ func waitForBootstrapComplete(ctx context.Context, config *rest.Config) *cluster
// waitForBootstrapConfigMap watches the configmaps in the kube-system namespace
// and waits for the bootstrap configmap to report that bootstrapping has
// completed.
func waitForBootstrapConfigMap(ctx context.Context, client *kubernetes.Clientset) *clusterCreateError {
func waitForBootstrapConfigMap(ctx context.Context, config *rest.Config, client *kubernetes.Clientset) *clusterCreateError {
timeout := 30 * time.Minute

platformName := ""

// Wait longer for baremetal, VSphere due to length of time it takes to boot
if assetStore, err := assetstore.NewStore(command.RootOpts.Dir); err == nil {
if installConfig, err := assetStore.Load(&installconfig.InstallConfig{}); err == nil && installConfig != nil {
if installConfig.(*installconfig.InstallConfig).Config.Platform.Name() == baremetal.Name || installConfig.(*installconfig.InstallConfig).Config.Platform.Name() == vsphere.Name {
timeout = 60 * time.Minute
}
platformName = installConfig.(*installconfig.InstallConfig).Config.Platform.Name()
}
}

if platformName == baremetal.Name || platformName == vsphere.Name {
timeout = 60 * time.Minute
}

untilTime := time.Now().Add(timeout)
timezone, _ := untilTime.Zone()
logrus.Infof("Waiting up to %v (until %v %s) for bootstrapping to complete...",
Expand All @@ -469,6 +474,13 @@ func waitForBootstrapConfigMap(ctx context.Context, client *kubernetes.Clientset
waitCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

// baremetal: monitor control plane bootstrapping progress
if platformName == baremetal.Name {
if err := baremetalutils.WaitForBaremetalBootstrapControlPlane(waitCtx, config); err != nil {
return newBootstrapError(err)
}
}

_, err := clientwatch.UntilWithSync(
waitCtx,
cache.NewListWatchFromClient(client.CoreV1().RESTClient(), "configmaps", "kube-system", fields.OneTermEqualSelector("metadata.name", "bootstrap")),
Expand Down
7 changes: 7 additions & 0 deletions pkg/utils/baremetal/OWNERS
@@ -0,0 +1,7 @@
# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md
# This file just uses aliases defined in OWNERS_ALIASES.

approvers:
- baremetal-approvers
reviewers:
- baremetal-reviewers
104 changes: 104 additions & 0 deletions pkg/utils/baremetal/bootstrap.go
@@ -0,0 +1,104 @@
package baremetal

import (
"context"
"fmt"
"time"

baremetalhost "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/rest"
clientwatch "k8s.io/client-go/tools/watch"
)

// WaitForBaremetalBootstrapControlPlane will watch baremetalhost resources on the bootstrap
// and wait for the control plane to finish provisioning.
func WaitForBaremetalBootstrapControlPlane(ctx context.Context, config *rest.Config) error {
timeout := 30 * time.Minute

client, err := dynamic.NewForConfig(config)
if err != nil {
return fmt.Errorf("creating a baremetal client: %w", err)
}

r := client.Resource(baremetalhost.GroupVersion.WithResource("baremetalhosts")).Namespace("openshift-machine-api")
blw := BmhCacheListerWatcher{
Resource: r,
Context: ctx,
}

untilTime := time.Now().Add(timeout)
timezone, _ := untilTime.Zone()
logrus.Infof("Waiting up to %v (until %v %s) for baremetal control plane to provision...",
timeout, untilTime.Format(time.Kitchen), timezone)

waitCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

masters := map[string]baremetalhost.BareMetalHost{}

_, err = clientwatch.UntilWithSync(
waitCtx,
blw,
&unstructured.Unstructured{},
nil,
func(event watch.Event) (bool, error) {
switch event.Type {
case watch.Added, watch.Modified:
default:
return false, nil
}

bmh := &baremetalhost.BareMetalHost{}

unstr, err := runtime.DefaultUnstructuredConverter.ToUnstructured(event.Object)
if err != nil {
return false, err
}

if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstr, bmh); err != nil {
logrus.Error("failed to convert to bmh", err)
return false, err
}

role, found := bmh.Labels["installer.openshift.io/role"]

if found && role == "control-plane" {
prev, found := masters[bmh.Name]

if !found || bmh.Status.Provisioning.State != prev.Status.Provisioning.State {
if bmh.Status.Provisioning.State == baremetalhost.StateNone {
// StateNone is an empty string
logrus.Infof(" baremetalhost: %s: uninitialized", bmh.Name)
} else {
logrus.Infof(" baremetalhost: %s: %s", bmh.Name, bmh.Status.Provisioning.State)
}

if bmh.Status.OperationalStatus == baremetalhost.OperationalStatusError {
logrus.Warnf(" baremetalhost: %s: error: %s %s", bmh.Name, bmh.Status.ErrorType, bmh.Status.ErrorMessage)
}
}

masters[bmh.Name] = *bmh
}

if len(masters) == 0 {
return false, nil
}

for _, master := range masters {
if master.Status.Provisioning.State != baremetalhost.StateProvisioned {
return false, nil
}
}

return true, nil
},
)

return err
}
47 changes: 47 additions & 0 deletions pkg/utils/baremetal/cache.go
@@ -0,0 +1,47 @@
package baremetal

import (
"context"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/dynamic"
)

// BmhCacheListerWatcher is an object that wraps the listing and wrapping
// functionality for baremetal host resources.
type BmhCacheListerWatcher struct {
Resource dynamic.ResourceInterface
Context context.Context
}

// List returns a list of baremetal hosts as dynamic objects.
func (bc BmhCacheListerWatcher) List(options metav1.ListOptions) (runtime.Object, error) {
list, err := bc.Resource.List(bc.Context, options)
if err != nil {
if err.Error() == "the server could not find the requested resource" {
return &unstructured.UnstructuredList{}, nil
}
}
return list, err
}

// Watch starts a watch over baremetal hosts.
func (bc BmhCacheListerWatcher) Watch(options metav1.ListOptions) (watch.Interface, error) {
w, err := bc.Resource.Watch(bc.Context, options)
if err != nil {
if err.Error() == "the server could not find the requested resource" {
// We can't use watch.NewEmptyWatch here because it closes too quickly.
fake := watch.NewFake()
go func() {
time.Sleep(time.Second * 2)
fake.Stop()
}()
return fake, nil
}
}
return w, err
}

0 comments on commit c1e81b3

Please sign in to comment.