Skip to content

Commit

Permalink
Merge pull request #1167 from tssurya/pre-puller-4.8
Browse files Browse the repository at this point in the history
Bug 1987046: Add pre-puller ds to reduce upgrade downtime
  • Loading branch information
openshift-merge-robot committed Aug 28, 2021
2 parents f7e31c5 + c013bb0 commit c750237
Show file tree
Hide file tree
Showing 5 changed files with 313 additions and 55 deletions.
40 changes: 40 additions & 0 deletions bindata/network/ovn-kubernetes/pre-puller.yaml
@@ -0,0 +1,40 @@
kind: DaemonSet
apiVersion: apps/v1
metadata:
name: ovnkube-upgrades-prepuller
namespace: openshift-ovn-kubernetes
annotations:
kubernetes.io/description: |
This daemonset launches the pre-puller component during upgrades that pulls the image onto the node.
release.openshift.io/version: "{{.ReleaseVersion}}"
spec:
selector:
matchLabels:
app: ovnkube-upgrades-prepuller
template:
metadata:
labels:
app: ovnkube-upgrades-prepuller
component: network
type: infra
openshift.io/component: network
kubernetes.io/os: "linux"
spec:
serviceAccountName: ovn-kubernetes-node
hostNetwork: true
priorityClassName: "system-node-critical"
containers:
# ovnkube-upgrades-prepuller: no-op container that simply pulls the new image during upgrades
- name: ovnkube-upgrades-prepuller
image: "{{.OvnImage}}"
imagePullPolicy: Always
command:
- /bin/bash
- -c
- |
echo "$(date -Iseconds) - finished pulling ovnkube-node image."
sleep infinity
nodeSelector:
beta.kubernetes.io/os: "linux"
tolerations:
- operator: "Exists"
1 change: 1 addition & 0 deletions pkg/bootstrap/types.go
Expand Up @@ -34,6 +34,7 @@ type OVNBootstrapResult struct {
ExistingNodeDaemonset *appsv1.DaemonSet
GatewayMode string
Platform configv1.PlatformType
PrePullerDaemonset *appsv1.DaemonSet
}

type BootstrapResult struct {
Expand Down
64 changes: 64 additions & 0 deletions pkg/network/ovn_kubernetes.go
Expand Up @@ -175,6 +175,11 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo
updateNode, updateMaster = shouldUpdateOVNKonUpgrade(bootstrapResult.OVN.ExistingNodeDaemonset, bootstrapResult.OVN.ExistingMasterDaemonset, os.Getenv("RELEASE_VERSION"))
}

renderPrePull := false
if updateNode {
updateNode, renderPrePull = shouldUpdateOVNKonPrepull(bootstrapResult.OVN.ExistingNodeDaemonset, bootstrapResult.OVN.PrePullerDaemonset, os.Getenv("RELEASE_VERSION"))
}

// If we need to delay master or node daemonset rollout, then we'll replace the new one with the existing one
if !updateMaster {
us, err := k8s.ToUnstructured(bootstrapResult.OVN.ExistingMasterDaemonset)
Expand All @@ -191,6 +196,11 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo
objs = k8s.ReplaceObj(objs, us)
}

if !renderPrePull {
// remove prepull from the list of objects to render.
objs = k8s.RemoveObjByGroupKindName(objs, "apps", "DaemonSet", "openshift-ovn-kubernetes", "ovnkube-upgrades-prepuller")
}

return objs, nil
}

Expand Down Expand Up @@ -477,13 +487,24 @@ func bootstrapOVN(conf *operv1.Network, kubeClient client.Client) (*bootstrap.Bo
}
}

prePullerDS := &appsv1.DaemonSet{}
nsn = types.NamespacedName{Namespace: "openshift-ovn-kubernetes", Name: "ovnkube-upgrades-prepuller"}
if err := kubeClient.Get(context.TODO(), nsn, prePullerDS); err != nil {
if !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("Failed to retrieve existing prepuller DaemonSet: %w", err)
} else {
prePullerDS = nil
}
}

res := bootstrap.BootstrapResult{
OVN: bootstrap.OVNBootstrapResult{
MasterIPs: ovnMasterIPs,
ClusterInitiator: clusterInitiator,
ExistingMasterDaemonset: masterDS,
ExistingNodeDaemonset: nodeDS,
GatewayMode: gatewayMode,
PrePullerDaemonset: prePullerDS,
},
}
return &res, nil
Expand Down Expand Up @@ -550,6 +571,49 @@ func shouldUpdateOVNKonIPFamilyChange(existingNode, existingMaster *appsv1.Daemo
return true, true
}

// shouldUpdateOVNKonPrepull implements a simple pre-pulling daemonset. It ensures the ovn-k
// container image is (probably) already pulled by every node.
// If the existing node daemonset has a different version then what we would like to apply, we first
// roll out a no-op daemonset. Then, when that has rolled out to 100% of the cluster or has stopped
// progressing, proceed with the node upgrade.
func shouldUpdateOVNKonPrepull(existingNode, prePuller *appsv1.DaemonSet, releaseVersion string) (updateNode, renderPrepull bool) {
// Fresh cluster - full steam ahead! No need to wait for pre-puller.
if existingNode == nil {
klog.V(3).Infof("Fresh cluster, no need for prepuller")
return true, false
}

// if node is already upgraded, then no need to pre-pull
// Return true so that we reconcile any changes that somehow could have happened.
existingNodeVersion := existingNode.GetAnnotations()["release.openshift.io/version"]
if existingNodeVersion == releaseVersion {
klog.V(3).Infof("OVN-Kubernetes node is already in the expected release.")
return true, false
}

// at this point, we've determined we need an upgrade
if prePuller == nil {
klog.Infof("Rolling out the no-op prepuller daemonset...")
return false, true
}

// If pre-puller just pulled a new upgrade image and then we
// downgrade immediately, we might wanna make prepuller pull the downgrade image.
existingPrePullerVersion := prePuller.GetAnnotations()["release.openshift.io/version"]
if existingPrePullerVersion != releaseVersion {
klog.Infof("Rendering prepuller daemonset to update its image...")
return false, true
}

if daemonSetProgressing(prePuller, true) {
klog.Infof("Waiting for ovnkube-upgrades-prepuller daemonset to finish pulling the image before updating node")
return false, true
}

klog.Infof("OVN-Kube upgrades-prepuller daemonset rollout complete, now starting node rollouts")
return true, false
}

// shouldUpdateOVNKonUpgrade determines if we should roll out changes to
// the master and node daemonsets on upgrades. We roll out nodes first,
// then masters. Downgrades, we do the opposite.
Expand Down

0 comments on commit c750237

Please sign in to comment.