diff --git a/Gopkg.lock b/Gopkg.lock index 54a7c89b41..124245eedf 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -405,7 +405,7 @@ revision = "a47c6814c02c8516ee468a2ce68fbb798755a76e" [[projects]] - digest = "1:b76e13ecb05043f1188cc9520f36668560a525ec73fddc75dbc5400a595f1828" + digest = "1:5090c23b108ee7237435eb9a8dc32b62e85f3efac66f158334bcc470deec2d75" name = "github.com/operator-framework/operator-sdk" packages = [ "internal/pkg/scaffold", @@ -417,6 +417,7 @@ "internal/util/projutil", "internal/util/yamlutil", "pkg/k8sutil", + "pkg/leader", "pkg/test", "version", ] @@ -1211,6 +1212,7 @@ "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1", "github.com/openshift/library-go/pkg/operator/v1helpers", "github.com/operator-framework/operator-sdk/pkg/k8sutil", + "github.com/operator-framework/operator-sdk/pkg/leader", "github.com/operator-framework/operator-sdk/pkg/test", "github.com/operator-framework/operator-sdk/version", "gopkg.in/yaml.v2", diff --git a/cmd/manager/main.go b/cmd/manager/main.go index ab91b9d6af..4a9fa67b8f 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -1,6 +1,7 @@ package main import ( + "context" "flag" "os" "runtime" @@ -11,6 +12,7 @@ import ( "github.com/openshift/cluster-node-tuning-operator/pkg/controller" "github.com/openshift/cluster-node-tuning-operator/version" "github.com/operator-framework/operator-sdk/pkg/k8sutil" + "github.com/operator-framework/operator-sdk/pkg/leader" sdkVersion "github.com/operator-framework/operator-sdk/version" _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" "k8s.io/klog" @@ -51,6 +53,14 @@ func main() { glog.Fatal(err) } + ctx := context.TODO() + + // Become the leader before proceeding + err = leader.Become(ctx, "node-tuning-operator-lock") + if err != nil { + glog.Fatal(err) + } + // Create a new Cmd to provide shared dependencies and start components mgr, err := manager.New(cfg, manager.Options{Namespace: namespace}) if err != nil { diff --git a/vendor/github.com/operator-framework/operator-sdk/pkg/leader/doc.go b/vendor/github.com/operator-framework/operator-sdk/pkg/leader/doc.go new file mode 100644 index 0000000000..b88c30a2cc --- /dev/null +++ b/vendor/github.com/operator-framework/operator-sdk/pkg/leader/doc.go @@ -0,0 +1,54 @@ +// Copyright 2018 The Operator-SDK Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* +Package leader implements Leader For Life, a simple alternative to lease-based +leader election. + +Both the Leader For Life and lease-based approaches to leader election are +built on the concept that each candidate will attempt to create a resource with +the same GVK, namespace, and name. Whichever candidate succeeds becomes the +leader. The rest receive "already exists" errors and wait for a new +opportunity. + +Leases provide a way to indirectly observe whether the leader still exists. The +leader must periodically renew its lease, usually by updating a timestamp in +its lock record. If it fails to do so, it is presumed dead, and a new election +takes place. If the leader is in fact still alive but unreachable, it is +expected to gracefully step down. A variety of factors can cause a leader to +fail at updating its lease, but continue acting as the leader before succeeding +at stepping down. + +In the "leader for life" approach, a specific Pod is the leader. Once +established (by creating a lock record), the Pod is the leader until it is +destroyed. There is no possibility for multiple pods to think they are the +leader at the same time. The leader does not need to renew a lease, consider +stepping down, or do anything related to election activity once it becomes the +leader. + +The lock record in this case is a ConfigMap whose OwnerReference is set to the +Pod that is the leader. When the leader is destroyed, the ConfigMap gets +garbage-collected, enabling a different candidate Pod to become the leader. + +Leader for Life requires that all candidate Pods be in the same Namespace. It +uses the downwards API to determine the pod name, as hostname is not reliable. +You should run it configured with: + +env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name +*/ +package leader diff --git a/vendor/github.com/operator-framework/operator-sdk/pkg/leader/leader.go b/vendor/github.com/operator-framework/operator-sdk/pkg/leader/leader.go new file mode 100644 index 0000000000..94fa444b09 --- /dev/null +++ b/vendor/github.com/operator-framework/operator-sdk/pkg/leader/leader.go @@ -0,0 +1,154 @@ +// Copyright 2018 The Operator-SDK Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package leader + +import ( + "context" + "time" + + "github.com/operator-framework/operator-sdk/pkg/k8sutil" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + crclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/config" + logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" +) + +var log = logf.Log.WithName("leader") + +// maxBackoffInterval defines the maximum amount of time to wait between +// attempts to become the leader. +const maxBackoffInterval = time.Second * 16 + +// Become ensures that the current pod is the leader within its namespace. If +// run outside a cluster, it will skip leader election and return nil. It +// continuously tries to create a ConfigMap with the provided name and the +// current pod set as the owner reference. Only one can exist at a time with +// the same name, so the pod that successfully creates the ConfigMap is the +// leader. Upon termination of that pod, the garbage collector will delete the +// ConfigMap, enabling a different pod to become the leader. +func Become(ctx context.Context, lockName string) error { + log.Info("Trying to become the leader.") + + ns, err := k8sutil.GetOperatorNamespace() + if err != nil { + if err == k8sutil.ErrNoNamespace { + log.Info("Skipping leader election; not running in a cluster.") + return nil + } + return err + } + + config, err := config.GetConfig() + if err != nil { + return err + } + + client, err := crclient.New(config, crclient.Options{}) + if err != nil { + return err + } + + owner, err := myOwnerRef(ctx, client, ns) + if err != nil { + return err + } + + // check for existing lock from this pod, in case we got restarted + existing := &corev1.ConfigMap{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "ConfigMap", + }, + } + key := crclient.ObjectKey{Namespace: ns, Name: lockName} + err = client.Get(ctx, key, existing) + + switch { + case err == nil: + for _, existingOwner := range existing.GetOwnerReferences() { + if existingOwner.Name == owner.Name { + log.Info("Found existing lock with my name. I was likely restarted.") + log.Info("Continuing as the leader.") + return nil + } else { + log.Info("Found existing lock", "LockOwner", existingOwner.Name) + } + } + case apierrors.IsNotFound(err): + log.Info("No pre-existing lock was found.") + default: + log.Error(err, "Unknown error trying to get ConfigMap") + return err + } + + cm := &corev1.ConfigMap{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "ConfigMap", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: lockName, + Namespace: ns, + OwnerReferences: []metav1.OwnerReference{*owner}, + }, + } + + // try to create a lock + backoff := time.Second + for { + err := client.Create(ctx, cm) + switch { + case err == nil: + log.Info("Became the leader.") + return nil + case apierrors.IsAlreadyExists(err): + log.Info("Not the leader. Waiting.") + select { + case <-time.After(wait.Jitter(backoff, .2)): + if backoff < maxBackoffInterval { + backoff *= 2 + } + continue + case <-ctx.Done(): + return ctx.Err() + } + default: + log.Error(err, "Unknown error creating ConfigMap") + return err + } + } +} + +// myOwnerRef returns an OwnerReference that corresponds to the pod in which +// this code is currently running. +// It expects the environment variable POD_NAME to be set by the downwards API +func myOwnerRef(ctx context.Context, client crclient.Client, ns string) (*metav1.OwnerReference, error) { + myPod, err := k8sutil.GetPod(ctx, client, ns) + if err != nil { + return nil, err + } + + owner := &metav1.OwnerReference{ + APIVersion: "v1", + Kind: "Pod", + Name: myPod.ObjectMeta.Name, + UID: myPod.ObjectMeta.UID, + } + return owner, nil +}