Skip to content

Commit

Permalink
OCPBUGS-1806: Use machines instead of nodes to detect masters
Browse files Browse the repository at this point in the history
Apparently, NodeSpec.ProviderID may not be populated until CAPBM
gets to it. Which may not happen if BMO does not start.

Solve this chicken-and-egg problem by listing machines instead.

Also add more logging around this part since it's error-prone.
  • Loading branch information
dtantsur committed Oct 5, 2022
1 parent dbd14b7 commit 0b0f880
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 73 deletions.
16 changes: 8 additions & 8 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,6 @@ rules:
- list
- patch
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
Expand Down Expand Up @@ -106,6 +98,14 @@ rules:
- get
- list
- watch
- apiGroups:
- machine.openshift.io
resources:
- machines
verbs:
- get
- list
- watch
- apiGroups:
- metal3.io
resources:
Expand Down
57 changes: 39 additions & 18 deletions controllers/provisioning_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ import (
"context"
"fmt"
"net"
"net/url"
"os"
"reflect"
"sort"
"strings"
"time"

Expand All @@ -36,6 +37,7 @@ import (
"k8s.io/apimachinery/pkg/selection"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
utilnet "k8s.io/utils/net"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -44,6 +46,7 @@ import (

baremetalv1alpha1 "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1"
osconfigv1 "github.com/openshift/api/config/v1"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
osclientset "github.com/openshift/client-go/config/clientset/versioned"
"github.com/openshift/cluster-baremetal-operator/api/v1alpha1"
metal3iov1alpha1 "github.com/openshift/cluster-baremetal-operator/api/v1alpha1"
Expand All @@ -60,6 +63,8 @@ const (
clusterConfigName = "cluster-config-v1"
clusterConfigKey = "install-config"
clusterConfigNamespace = "kube-system"
// Annotation linking a machine to a host
HostAnnotation = "metal3.io/BareMetalHost"
)

// ProvisioningReconciler reconciles a Provisioning object
Expand Down Expand Up @@ -93,8 +98,8 @@ type ensureFunc func(*provisioning.ProvisioningInfo) (bool, error)
// +kubebuilder:rbac:groups=config.openshift.io,resources=infrastructures;infrastructures/status,verbs=get
// +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;list;patch
// +kubebuilder:rbac:groups="",resources=configmaps;secrets;services,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch
// +kubebuilder:rbac:groups=apps,resources=deployments;daemonsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=machine.openshift.io,resources=machines,verbs=get;list;watch
// +kubebuilder:rbac:groups=metal3.io,resources=provisionings;provisionings/finalizers,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=metal3.io,resources=provisionings/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=metal3.io,resources=baremetalhosts,verbs=get;list;watch;update;patch
Expand Down Expand Up @@ -477,37 +482,44 @@ func (r *ProvisioningReconciler) networkStackFromServiceNetwork(ctx context.Cont
return ns, nil
}

func getHostByProviderId(provId string) string {
if provId == "" {
func getHostByMachine(meta metav1.ObjectMeta) string {
annotations := meta.GetAnnotations()
annotation, ok := annotations[HostAnnotation]
if !ok {
klog.Warningf("Ignoring machine %s without annotation linking it to BareMetalHost", meta.Name)
return ""
}

provider, err := url.Parse(provId)
if err != nil || provider.Scheme != "baremetalhost" {
hostNamespace, hostName, err := cache.SplitMetaNamespaceKey(annotation)
if err != nil {
klog.Warningf("Ignoring machine %s with invalid BareMetalHost link %s", meta.Name, annotation)
return ""
}

path := strings.Split(strings.Trim(provider.Path, "/"), "/")
if len(path) < 2 || path[0] != ComponentNamespace {
if hostNamespace != ComponentNamespace {
klog.Warningf("Ignoring machine %s that is linked to a BareMetalHost in namespace %s", meta.Name, hostNamespace)
return ""
}

return path[1]
return hostName
}

func (r *ProvisioningReconciler) updateProvisioningMacAddresses(ctx context.Context, provConfig *metal3iov1alpha1.Provisioning) error {
if len(provConfig.Spec.ProvisioningMacAddresses) != 0 {
return nil
}

nodes := corev1.NodeList{}
machines := machinev1beta1.MachineList{}
bmhNames := []string{}
labelReq, _ := labels.NewRequirement("node-role.kubernetes.io/master", selection.Exists, nil)
if err := r.Client.List(ctx, &nodes, &client.ListOptions{LabelSelector: labels.NewSelector().Add(*labelReq)}); err != nil {
return errors.Wrap(err, "cannot list master nodes")
labelReq, _ := labels.NewRequirement("machine.openshift.io/cluster-api-machine-role", selection.Equals, []string{"master"})
if err := r.Client.List(ctx, &machines, &client.ListOptions{LabelSelector: labels.NewSelector().Add(*labelReq)}); err != nil {
return errors.Wrap(err, "cannot list master machines")
}
for _, node := range nodes.Items {
bmhName := getHostByProviderId(node.Spec.ProviderID)
if len(machines.Items) < 1 {
return errors.New("machines with cluster-api-machine-role=master not found")
}

for _, machine := range machines.Items {
bmhName := getHostByMachine(machine.ObjectMeta)
if bmhName != "" {
bmhNames = append(bmhNames, bmhName)
}
Expand All @@ -523,8 +535,16 @@ func (r *ProvisioningReconciler) updateProvisioningMacAddresses(ctx context.Cont
macs = append(macs, bmh.Spec.BootMACAddress)
}
}
provConfig.Spec.ProvisioningMacAddresses = macs
return r.Client.Update(ctx, provConfig)

sort.Strings(macs)
if !reflect.DeepEqual(provConfig.Spec.ProvisioningMacAddresses, macs) {
klog.InfoS("Updating provisioning MAC addresses",
"CurrentMacAddresses", provConfig.Spec.ProvisioningMacAddresses, "NewMacAddresses", macs)
provConfig.Spec.ProvisioningMacAddresses = macs
return r.Client.Update(ctx, provConfig)
} else {
return nil
}
}

// SetupWithManager configures the manager to run the controller
Expand Down Expand Up @@ -572,5 +592,6 @@ func (r *ProvisioningReconciler) SetupWithManager(mgr ctrl.Manager) error {
Owns(&appsv1.DaemonSet{}).
Owns(&osconfigv1.ClusterOperator{}).
Owns(&osconfigv1.Proxy{}).
Owns(&machinev1beta1.Machine{}).
Complete(r)
}
67 changes: 28 additions & 39 deletions controllers/provisioning_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

baremetalv1alpha1 "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1"
configv1 "github.com/openshift/api/config/v1"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
fakeconfigclientset "github.com/openshift/client-go/config/clientset/versioned/fake"
metal3iov1alpha1 "github.com/openshift/cluster-baremetal-operator/api/v1alpha1"
"github.com/openshift/cluster-baremetal-operator/provisioning"
Expand All @@ -24,6 +25,7 @@ func setUpSchemeForReconciler() *runtime.Scheme {
// we need to add the openshift/api to the scheme to be able to read
// the infrastructure CR
utilruntime.Must(configv1.AddToScheme(scheme))
utilruntime.Must(machinev1beta1.AddToScheme(scheme))
utilruntime.Must(metal3iov1alpha1.AddToScheme(scheme))
utilruntime.Must(baremetalv1alpha1.AddToScheme(scheme))
return scheme
Expand Down Expand Up @@ -215,65 +217,52 @@ func TestUpdateProvisioningMacAddresses(t *testing.T) {
BootMACAddress: "00:3d:25:45:bf:e9",
},
},
&corev1.Node{
&machinev1beta1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
Labels: map[string]string{"node-role.kubernetes.io/master": ""},
},
Spec: corev1.NodeSpec{
ProviderID: "baremetalhost:///openshift-machine-api/test-master-0/something",
Name: "node-0",
Labels: map[string]string{"machine.openshift.io/cluster-api-machine-role": "master"},
Annotations: map[string]string{"metal3.io/BareMetalHost": "openshift-machine-api/test-master-0"},
},
},
&corev1.Node{
&machinev1beta1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
Labels: map[string]string{"node-role.kubernetes.io/master": ""},
},
Spec: corev1.NodeSpec{
ProviderID: "provider:///openshift-machine-api/test-worker-0/something",
Name: "node-1",
Labels: map[string]string{"machine.openshift.io/cluster-api-machine-role": "master"},
Annotations: map[string]string{"metal3.io/BareMetalHost": "another-ns/host"},
},
},
&corev1.Node{
&machinev1beta1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "node-2",
Labels: map[string]string{"node-role.kubernetes.io/master": ""},
},
Spec: corev1.NodeSpec{
ProviderID: "baremetalhost:///broken",
Name: "node-2",
Labels: map[string]string{"machine.openshift.io/cluster-api-machine-role": "master"},
Annotations: map[string]string{"metal3.io/BareMetalHost": "invalid"},
},
},
&corev1.Node{
&machinev1beta1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "node-3",
Labels: map[string]string{"node-role.kubernetes.io/master": ""},
},
Spec: corev1.NodeSpec{
ProviderID: "baremetalhost:///openshift-machine-api/test-controlplane-1/something",
Name: "node-3",
Labels: map[string]string{"machine.openshift.io/cluster-api-machine-role": "master"},
Annotations: map[string]string{"metal3.io/BareMetalHost": "openshift-machine-api/test-controlplane-1"},
},
},
&corev1.Node{
&machinev1beta1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "node-4",
Labels: map[string]string{"node-role.kubernetes.io/master": ""},
Labels: map[string]string{"machine.openshift.io/cluster-api-machine-role": "master"},
},
Spec: corev1.NodeSpec{},
},
&corev1.Node{
&machinev1beta1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "node-5",
Labels: map[string]string{"node-role.kubernetes.io/master": ""},
},
Spec: corev1.NodeSpec{
ProviderID: "baremetalhost:///openshift-machine-api/test-master-2/something",
Name: "node-5",
Labels: map[string]string{"machine.openshift.io/cluster-api-machine-role": "master"},
Annotations: map[string]string{"metal3.io/BareMetalHost": "openshift-machine-api/test-master-2"},
},
},
&corev1.Node{
&machinev1beta1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "node-6",
Labels: map[string]string{"node-role.kubernetes.io/worker": ""},
},
Spec: corev1.NodeSpec{
ProviderID: "baremetalhost:///openshift-machine-api/test-worker-1/something",
Name: "node-6",
Labels: map[string]string{"machine.openshift.io/cluster-api-machine-role": "worker"},
Annotations: map[string]string{"metal3.io/BareMetalHost": "openshift-machine-api/test-worker-0"},
},
},
}
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (

baremetalv1alpha1 "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1"
osconfigv1 "github.com/openshift/api/config/v1"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
osclientset "github.com/openshift/client-go/config/clientset/versioned"
metal3iov1alpha1 "github.com/openshift/cluster-baremetal-operator/api/v1alpha1"
"github.com/openshift/cluster-baremetal-operator/controllers"
Expand All @@ -49,6 +50,7 @@ func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(metal3iov1alpha1.AddToScheme(scheme))
utilruntime.Must(osconfigv1.AddToScheme(scheme))
utilruntime.Must(machinev1beta1.AddToScheme(scheme))
utilruntime.Must(baremetalv1alpha1.AddToScheme(scheme))

// +kubebuilder:scaffold:scheme
Expand Down
16 changes: 8 additions & 8 deletions manifests/0000_31_cluster-baremetal-operator_05_rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,6 @@ rules:
- list
- patch
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
Expand Down Expand Up @@ -171,6 +163,14 @@ rules:
- get
- list
- watch
- apiGroups:
- machine.openshift.io
resources:
- machines
verbs:
- get
- list
- watch
- apiGroups:
- metal3.io
resources:
Expand Down

1 comment on commit 0b0f880

@aleskandro
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refers OCPBUGS-2054

Please sign in to comment.