Skip to content

Commit

Permalink
baremetal: monitor bootstrap process
Browse files Browse the repository at this point in the history
  • Loading branch information
honza committed Apr 6, 2024
1 parent 9c98f76 commit c2dfbe9
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 6 deletions.
26 changes: 20 additions & 6 deletions cmd/openshift-install/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ import (
"github.com/openshift/installer/pkg/types/baremetal"
"github.com/openshift/installer/pkg/types/gcp"
"github.com/openshift/installer/pkg/types/vsphere"
baremetalutils "github.com/openshift/installer/pkg/utils/baremetal"
cov1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers"
"github.com/openshift/library-go/pkg/route/routeapihelpers"
)
Expand Down Expand Up @@ -435,7 +436,7 @@ func waitForBootstrapComplete(ctx context.Context, config *rest.Config) *cluster
return newAPIError(err)
}

if err := waitForBootstrapConfigMap(ctx, client); err != nil {
if err := waitForBootstrapConfigMap(ctx, config, client); err != nil {
return err
}

Expand All @@ -449,18 +450,22 @@ func waitForBootstrapComplete(ctx context.Context, config *rest.Config) *cluster
// waitForBootstrapConfigMap watches the configmaps in the kube-system namespace
// and waits for the bootstrap configmap to report that bootstrapping has
// completed.
func waitForBootstrapConfigMap(ctx context.Context, client *kubernetes.Clientset) *clusterCreateError {
func waitForBootstrapConfigMap(ctx context.Context, config *rest.Config, client *kubernetes.Clientset) *clusterCreateError {
timeout := 30 * time.Minute

// Wait longer for baremetal, VSphere due to length of time it takes to boot
platformName := ""

if assetStore, err := assetstore.NewStore(command.RootOpts.Dir); err == nil {
if installConfig, err := assetStore.Load(&installconfig.InstallConfig{}); err == nil && installConfig != nil {
if installConfig.(*installconfig.InstallConfig).Config.Platform.Name() == baremetal.Name || installConfig.(*installconfig.InstallConfig).Config.Platform.Name() == vsphere.Name {
timeout = 60 * time.Minute
}
platformName = installConfig.(*installconfig.InstallConfig).Config.Platform.Name()
}
}

// Wait longer for baremetal, VSphere due to length of time it takes to boot
if platformName == baremetal.Name || platformName == vsphere.Name {
timeout = 60 * time.Minute
}

untilTime := time.Now().Add(timeout)
timezone, _ := untilTime.Zone()
logrus.Infof("Waiting up to %v (until %v %s) for bootstrapping to complete...",
Expand All @@ -469,6 +474,15 @@ func waitForBootstrapConfigMap(ctx context.Context, client *kubernetes.Clientset
waitCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

// baremetal: monitor control plane bootstrapping progress
if platformName == baremetal.Name {
if err := baremetalutils.WaitForBaremetalBootstrapControlPlane(waitCtx, config); err != nil {
return newBootstrapError(err)
}

logrus.Infof(" Baremetal control plane finished provisioning.")
}

_, err := clientwatch.UntilWithSync(
waitCtx,
cache.NewListWatchFromClient(client.CoreV1().RESTClient(), "configmaps", "kube-system", fields.OneTermEqualSelector("metadata.name", "bootstrap")),
Expand Down
7 changes: 7 additions & 0 deletions pkg/utils/baremetal/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md
# This file just uses aliases defined in OWNERS_ALIASES.

approvers:
- baremetal-approvers
reviewers:
- baremetal-reviewers
95 changes: 95 additions & 0 deletions pkg/utils/baremetal/bootstrap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package baremetal

import (
"context"
"fmt"

baremetalhost "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/rest"
clientwatch "k8s.io/client-go/tools/watch"
)

// WaitForBaremetalBootstrapControlPlane will watch baremetalhost resources on the bootstrap
// and wait for the control plane to finish provisioning.
func WaitForBaremetalBootstrapControlPlane(ctx context.Context, config *rest.Config) error {
client, err := dynamic.NewForConfig(config)
if err != nil {
return fmt.Errorf("creating a baremetal client: %w", err)
}

r := client.Resource(baremetalhost.GroupVersion.WithResource("baremetalhosts")).Namespace("openshift-machine-api")
blw := BmhCacheListerWatcher{
Resource: r,
RetryWatch: true,
}

logrus.Infof(" Waiting for baremetal control plane to provision...")

masters := map[string]baremetalhost.BareMetalHost{}

_, err = clientwatch.UntilWithSync(
ctx,
blw,
&unstructured.Unstructured{},
nil,
func(event watch.Event) (bool, error) {
switch event.Type {
case watch.Added, watch.Modified:
default:
return false, nil
}

bmh := &baremetalhost.BareMetalHost{}

unstr, err := runtime.DefaultUnstructuredConverter.ToUnstructured(event.Object)
if err != nil {
return false, err
}

if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstr, bmh); err != nil {
logrus.Error("failed to convert to bmh", err)
return false, err
}

role, found := bmh.Labels["installer.openshift.io/role"]

if found && role == "control-plane" {
prev, found := masters[bmh.Name]

if !found || bmh.Status.Provisioning.State != prev.Status.Provisioning.State {
if bmh.Status.Provisioning.State == baremetalhost.StateNone {
// StateNone is an empty string
logrus.Infof(" baremetalhost: %s: uninitialized", bmh.Name)
} else {
logrus.Infof(" baremetalhost: %s: %s", bmh.Name, bmh.Status.Provisioning.State)
}

if bmh.Status.OperationalStatus == baremetalhost.OperationalStatusError {
logrus.Warnf(" baremetalhost: %s: %s: %s", bmh.Name, bmh.Status.ErrorType, bmh.Status.ErrorMessage)
}
}

masters[bmh.Name] = *bmh
}

if len(masters) == 0 {
return false, nil
}

for _, master := range masters {
if master.Status.Provisioning.State != baremetalhost.StateProvisioned {
return false, nil
}
}

return true, nil
},
)

return err
}
49 changes: 49 additions & 0 deletions pkg/utils/baremetal/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package baremetal

import (
"context"
"time"

"github.com/sirupsen/logrus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/dynamic"
)

// BmhCacheListerWatcher is an object that wraps the listing and wrapping
// functionality for baremetal host resources.
type BmhCacheListerWatcher struct {
Resource dynamic.ResourceInterface
RetryWatch bool
}

// List returns a list of baremetal hosts as dynamic objects.
func (bc BmhCacheListerWatcher) List(options metav1.ListOptions) (runtime.Object, error) {
list, err := bc.Resource.List(context.TODO(), options)
if apierrors.IsNotFound(err) {
logrus.Debug(" baremetalhost resource not yet available, will retry")
return &unstructured.UnstructuredList{}, nil
}

return list, err
}

// Watch starts a watch over baremetal hosts.
func (bc BmhCacheListerWatcher) Watch(options metav1.ListOptions) (watch.Interface, error) {
w, err := bc.Resource.Watch(context.TODO(), options)
if apierrors.IsNotFound(err) && bc.RetryWatch {
logrus.Debug(" baremetalhost resource not yet available, will retry")
// When the Resource isn't installed yet, we can encourage the caller to keep
// retrying by supplying an empty watcher. In the case of
// UntilWithSync, the caller also checks how long it takes to create the
// watch. To avoid errors, we introduce an artificial delay of one
// second.
w := watch.NewEmptyWatch()
time.Sleep(time.Second)
return w, nil
}
return w, err
}

0 comments on commit c2dfbe9

Please sign in to comment.