Skip to content

Commit

Permalink
... add reason if no hbmh was found
Browse files Browse the repository at this point in the history
  • Loading branch information
guettli committed Feb 15, 2024
1 parent 8b70456 commit 88bd718
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 19 deletions.
76 changes: 58 additions & 18 deletions pkg/services/baremetal/baremetal/baremetal.go
Expand Up @@ -27,6 +27,9 @@ import (
"strings"
"time"

"golang.org/x/exp/maps"
"golang.org/x/exp/slices"

"github.com/go-logr/logr"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -283,19 +286,19 @@ func (s *Service) associate(ctx context.Context) error {
}

// choose new host
host, helper, err := s.chooseHost(ctx)
host, helper, reason, err := s.chooseHost(ctx)
if err != nil {
return fmt.Errorf("failed to choose host: %w", err)
}
if host == nil {
s.scope.BareMetalMachine.Status.Phase = clusterv1.MachinePhasePending
s.scope.V(1).Info("No available host found. Requeuing.")
s.scope.V(1).Info("No available host found. Requeuing.", "reason", reason)
conditions.MarkFalse(
s.scope.BareMetalMachine,
infrav1.HostAssociateSucceededCondition,
infrav1.NoAvailableHostReason,
clusterv1.ConditionSeverityWarning,
"no available host",
fmt.Sprintf("no available host (%s)", reason),
)
return &scope.RequeueAfterError{RequeueAfter: requeueAfter}
}
Expand Down Expand Up @@ -363,77 +366,114 @@ func (s *Service) getAssociatedHost(ctx context.Context) (*infrav1.HetznerBareMe
return &host, helper, nil
}

func (s *Service) chooseHost(ctx context.Context) (*infrav1.HetznerBareMetalHost, *patch.Helper, error) {
// chooseHost tries to find a free hbmh.
// If no hbmh was found, then hbmh and err are nil, and the string
// "reason" contains human readable details.
func (s *Service) chooseHost(ctx context.Context) (
hbmh *infrav1.HetznerBareMetalHost, patchHelper *patch.Helper, reason string, err error,
) {
// get list of hosts scoped to namespace of machine
hosts := infrav1.HetznerBareMetalHostList{}
opts := &client.ListOptions{
Namespace: s.scope.BareMetalMachine.Namespace,
}

if err := s.scope.Client.List(ctx, &hosts, opts); err != nil {
return nil, nil, fmt.Errorf("failed to list hosts: %w", err)
return nil, nil, "", fmt.Errorf("failed to list hosts: %w", err)
}

labelSelector := s.getLabelSelector()

availableHosts := make([]*infrav1.HetznerBareMetalHost, 0, len(hosts.Items))

mapOfSkipReasons := make(map[string]int)

usableDevices := 0
for i, host := range hosts.Items {
if host.Spec.ConsumerRef != nil && consumerRefMatches(host.Spec.ConsumerRef, s.scope.BareMetalMachine) {
helper, err := patch.NewHelper(&hosts.Items[i], s.scope.Client)
if err != nil {
return nil, nil, fmt.Errorf("failed to create patch helper: %w", err)
return nil, nil, "", fmt.Errorf("failed to create patch helper: %w", err)
}
return &hosts.Items[i], helper, nil
return &hosts.Items[i], helper, "", nil
}
if host.Spec.ConsumerRef != nil {
continue
}
if host.Spec.Status.ProvisioningState != infrav1.StateNone {
continue
}

// from now on each "continue" should add an entry
// to mapOfSkipReasons.
usableDevices++

if host.Spec.RootDeviceHints == nil {
mapOfSkipReasons["hbmh-is-missing-rootDeviceHints"]++
continue
}
if !host.Spec.RootDeviceHints.IsValid() {
mapOfSkipReasons["hbmh-has-invalid-rootDeviceHints"]++
continue
}
if s.scope.BareMetalMachine.Spec.InstallImage.Swraid == 1 {
// Machine should have RAID. Skip machines which have less than two WWNs
if len(host.Spec.RootDeviceHints.Raid.WWN) < 2 {
len_wwns := len(host.Spec.RootDeviceHints.Raid.WWN)
if len_wwns < 2 {
mapOfSkipReasons[fmt.Sprintf("machine-should-use-swraid-but-only-%d-WWN-in-hbmh", len_wwns)]++
continue
}
}
if host.Spec.ConsumerRef != nil {
continue
}
if host.Spec.MaintenanceMode != nil && *host.Spec.MaintenanceMode {
mapOfSkipReasons["hbmh-in-maintenance-mode"]++
continue
}
if host.GetDeletionTimestamp() != nil {
mapOfSkipReasons["hbmh-has-deletion-timestamp"]++
continue
}
if host.Spec.Status.ErrorMessage != "" {
mapOfSkipReasons["hbmh-has-error-message-in-status"]++
continue
}

if !labelSelector.Matches(labels.Set(host.ObjectMeta.Labels)) {
mapOfSkipReasons["label-selector-does-not-match"]++
continue
}

if host.Spec.Status.ProvisioningState != infrav1.StateNone {
availableHosts = append(availableHosts, &hosts.Items[i])
}

reasons := make([]string, 0, len(mapOfSkipReasons))
keys := maps.Keys(mapOfSkipReasons)
slices.Sort(keys)
for _, key := range keys {
value := mapOfSkipReasons[key]
if value == 0 {
continue
}

availableHosts = append(availableHosts, &hosts.Items[i])
reasons = append(reasons, fmt.Sprintf("%s: %d", key, value))
}

if len(availableHosts) == 0 {
return nil, nil, nil
return nil, nil, fmt.Sprintf("No usable device of %d found: %s", usableDevices, strings.Join(reasons, ", ")), nil
}

// choose a host
randomNumber, err := rand.Int(rand.Reader, big.NewInt(int64(len(availableHosts))))
if err != nil {
return nil, nil, fmt.Errorf("failed to create random number: %w", err)
return nil, nil, "", fmt.Errorf("failed to create random number: %w", err)
}

chosenHost := availableHosts[randomNumber.Int64()]

helper, err := patch.NewHelper(chosenHost, s.scope.Client)
if err != nil {
return nil, nil, fmt.Errorf("failed to create patch helper: %w", err)
return nil, nil, "", fmt.Errorf("failed to create patch helper: %w", err)
}

return chosenHost, helper, nil
return chosenHost, helper, "", nil
}

func (s *Service) reconcileLoadBalancerAttachment(ctx context.Context, host *infrav1.HetznerBareMetalHost) error {
Expand Down
52 changes: 51 additions & 1 deletion pkg/services/baremetal/baremetal/baremetal_test.go
Expand Up @@ -51,6 +51,16 @@ var _ = Describe("chooseHost", func() {
},
}

bmMachineWithRAID := &infrav1.HetznerBareMetalMachine{
TypeMeta: metav1.TypeMeta{},
ObjectMeta: metav1.ObjectMeta{Name: "bm-machine-raid", Namespace: defaultNamespace},
Spec: infrav1.HetznerBareMetalMachineSpec{
InstallImage: infrav1.InstallImage{
Swraid: 1,
},
},
}

hostWithCorrectConsumerRef := infrav1.HetznerBareMetalHost{
ObjectMeta: metav1.ObjectMeta{
Name: "hostWithCorrectConsumerRef",
Expand Down Expand Up @@ -204,10 +214,22 @@ var _ = Describe("chooseHost", func() {
},
}

hostWithInvalidRAIDConfig := infrav1.HetznerBareMetalHost{
ObjectMeta: metav1.ObjectMeta{
Name: "hostWithInvalidRAIDConfig",
Namespace: defaultNamespace,
},
Spec: infrav1.HetznerBareMetalHostSpec{
RootDeviceHints: &infrav1.RootDeviceHints{},
Status: infrav1.ControllerGeneratedStatus{ProvisioningState: infrav1.StateNone},
},
}

type testCaseChooseHost struct {
Hosts []client.Object
HostSelector infrav1.HostSelector
ExpectedHostName string
ExpectedReason string
}
DescribeTable("chooseHost",
func(tc testCaseChooseHost) {
Expand All @@ -217,8 +239,9 @@ var _ = Describe("chooseHost", func() {
bmMachine.Spec.HostSelector = tc.HostSelector
service := newTestService(bmMachine, c)

host, _, err := service.chooseHost(context.TODO())
host, _, reason, err := service.chooseHost(context.TODO())
Expect(err).To(Succeed())
Expect(reason).To(Equal(tc.ExpectedReason))
if tc.ExpectedHostName == "" {
Expect(host).To(BeNil())
} else {
Expand Down Expand Up @@ -275,6 +298,33 @@ var _ = Describe("chooseHost", func() {
}},
ExpectedHostName: "hostWithLabel",
}),
Entry("Choosing no host, because RAID config is invalid",
testCaseChooseHost{
Hosts: []client.Object{&hostWithLabel, &hostWithOtherLabel, &hostWithLabelAndMaintenanceMode, &host},
HostSelector: infrav1.HostSelector{MatchExpressions: []infrav1.HostSelectorRequirement{
{Key: "key", Operator: selection.In, Values: []string{"value", "value2"}},
}},
ExpectedHostName: "hostWithLabel",
}),
)
DescribeTable("chooseHostWithRAID",
func(tc testCaseChooseHost) {
scheme := runtime.NewScheme()
utilruntime.Must(infrav1.AddToScheme(scheme))
c := fakeclient.NewClientBuilder().WithScheme(scheme).WithObjects(tc.Hosts...).Build()
service := newTestService(bmMachineWithRAID, c)

host, _, reason, err := service.chooseHost(context.TODO())
Expect(err).To(Succeed())
Expect(reason).To(Equal(tc.ExpectedReason))
Expect(host).To(BeNil())
},
Entry("No host, because invalid RAID config",
testCaseChooseHost{
Hosts: []client.Object{&host, &hostWithInvalidRAIDConfig},
ExpectedHostName: "",
ExpectedReason: "No usable device of 1 found: machine-should-use-swraid-but-only-0-WWN-in-hbmh: 1",
}),
)
})

Expand Down

0 comments on commit 88bd718

Please sign in to comment.