openshift · fabianvf · Mar 11, 2019 · Nov 2, 2018 · Nov 5, 2018 · Nov 16, 2018
diff --git a/doc/proposals/apply.md b/doc/proposals/apply.md
@@ -0,0 +1,52 @@
+# Add Apply to Python Client
+
+## Problem
+   Current merge/patch strategy in the Ansible modules is not sufficient for a variety of reasons.
+   1. There is no way to remove a field from the Ansible module without doing a full replace on the object
+   2. The idempotence of the module is entirely reliant on the idempotence of the server. If a task to create a resource is run twice, two `GET`s and
+   two `PATCH`es will be issued, even if there was no content change. This is particularly an issue for the
+   Operater use-case, as tasks will be run repeatedly every 8 seconds or so, and the `PATCH` requests are not
+   cacheable.
+   3. There are several core resources that a blind, full-object patch does not work well with, including Deployments
+   or any resource that has a `ports` field, as the default merge strategy of `strategic-merge-patch` is bugged
+   on the server for those resources.
+   4. The `strategic-merge-patch` also does not work at all for CustomResources, requiring the user to add a directive to use a different patch strategy for CustomResource operations.
+
+
+## Proposal
+   Mimic the logic from `kubectl apply` to allow more intelligent patch strategies in the client. This implementation would be a stop-gap, as `apply` is slated to move to the server at some undefined point in the future, at which point we would probably just use the server-side apply.
+
+   `kubectl apply` makes use of a fairly complex 3-way diffing algorithm to generate a patch that performs deletions between the new configuration and the `LastAppliedConfiguration` (stored in the `kubectl.kubernetes.io/last-applied-configuration` annotation), while preserving non-conflicting additions to the real state of the object (ie, system defaulting or manual changes made to pieces of the spec).
+
+   This algorithm is fairly well-documented and cleanly written, but has a large number of edge cases and a fairly high branching factor.
+
+   The basic algorithm is as follows:
+
+   Given: a desired definition for an object to apply to the cluster
+
+   1. `GET` the current state of the object from Kubernetes
+   2. Pull the `LastAppliedConfiguration` from the current object
+   3. If the `LastAppliedConfiguration` does not exist, diff the desired definition and the current object and send the resulting patch to the server.
+   4. Otherwise, diff the `LastAppliedConfiguration` and the desired definition to compute the deletions. 
+   5. diff the current object and the desired definition to get the delta
+   6. Combine the deletions (from step 4) and the delta (from step 5) to a single patch
+   7. If the patch is not empty, send it to the server
+
+   Much of the complexity comes from diffing complex nested objects, and a variety of patch strategies that may be used (ie, adding vs replacing lists, reordering of lists or keys, deep vs shallow object comparisons)
+
+   Resources for golang implementation:
+   - https://kubernetes.io/docs/concepts/cluster-administration/manage-deployment/#kubectl-apply
+   - https://github.com/kubernetes/kubernetes/blob/master/pkg/kubectl/cmd/apply/apply.go
+   - https://github.com/kubernetes/kubernetes/blob/master/pkg/kubectl/apply.go
+   - https://github.com/kubernetes/apimachinery/blob/master/pkg/util/strategicpatch/patch.go#L2011
+
+
+
+## Estimates
+  Based on a cursory exploration of the golang implementation of `apply`:
+
+  - Up to 1 week for a basic prototype implementation, that can handle the majority of definitions
+  - 1 week to test/harden against edge cases
+  - 1 week to update and test the Ansible modules. This week may overlap with the hardening/testing period, since the process of integration will likely reveal many common edge-cases
+
+  Total: 2-3 weeks. I would be surprised if it took any more time than that for a working implementation.
diff --git a/openshift/dynamic/apply.py b/openshift/dynamic/apply.py
@@ -0,0 +1,91 @@
+from copy import deepcopy
+import json
+
+from openshift.dynamic.exceptions import NotFoundError
+
+LAST_APPLIED_CONFIG_ANNOTATION = 'kubectl.kubernetes.io/last-applied-configuration'
+
+def apply(resource, definition):
+    desired_annotation = dict(
+        metadata=dict(
+            annotations={
+                LAST_APPLIED_CONFIG_ANNOTATION: json.dumps(definition, separators=(',', ':'), indent=None)
+            }
+        )
+    )
+    try:
+        actual = resource.get(name=definition['metadata']['name'], namespace=definition['metadata']['namespace'])
+    except NotFoundError:
+        return resource.create(body=dict_merge(definition, desired_annotation), namespace=definition['metadata']['namespace'])
+    last_applied = actual.metadata.get('annotations',{}).get(LAST_APPLIED_CONFIG_ANNOTATION)
+
+    if last_applied:
+        last_applied = json.loads(last_applied)
+        actual_dict = actual.to_dict()
+        del actual_dict['metadata']['annotations'][LAST_APPLIED_CONFIG_ANNOTATION]
+        patch = merge(last_applied, definition, actual_dict)
+        if patch:
+            return resource.patch(body=dict_merge(patch, desired_annotation),
+                                  name=definition['metadata']['name'],
+                                  namespace=definition['metadata']['namespace'],
+                                  content_type='application/merge-patch+json')
+        else:
+            return actual
+    else:
+        return resource.patch(body=definition, name=definition['metadata']['name'], namespace=definition['metadata']['namespace'])
+
+
+# The patch is the difference from actual to desired without deletions, plus deletions
+# from last_applied to desired. To find it, we compute deletions, which are the deletions from
+# last_applied to desired, and delta, which is the difference from actual to desired without
+# deletions, and then apply delta to deletions as a patch, which should be strictly additive.
+def merge(last_applied, desired, actual):
+    deletions = get_deletions(last_applied, desired)
+    delta = get_delta(actual, desired)
+    return dict_merge(deletions, delta)
+
+
+# dict_merge taken from Ansible's module_utils.common.dict_transformations
+def dict_merge(a, b):
+    '''recursively merges dicts. not just simple a['key'] = b['key'], if
+    both a and b have a key whose value is a dict then dict_merge is called
+    on both values and the result stored in the returned dictionary.'''
+    if not isinstance(b, dict):
+        return b
+    result = deepcopy(a)
+    for k, v in b.items():
+        if k in result and isinstance(result[k], dict):
+                result[k] = dict_merge(result[k], v)
+        else:
+            result[k] = deepcopy(v)
+    return result
+
+
+def get_deletions(last_applied, desired):
+    patch = {}
+    for k, last_applied_value in last_applied.items():
+        desired_value = desired.get(k)
+        if desired_value is None:
+            patch[k] = None
+        elif type(last_applied_value) != type(desired_value):
+            patch[k] = desired_value
+        elif isinstance(last_applied_value, dict):
+            p = get_deletions(last_applied_value, desired_value)
+            if p:
+                patch[k] = p
+        elif last_applied_value != desired_value:
+            patch[k] = desired_value
+    return patch
+
+
+def get_delta(actual, desired):
+    patch = {}
+    for k, desired_value in desired.items():
+        actual_value = actual.get(k)
+        if actual_value is None:
+            patch[k] = desired_value
+        elif isinstance(desired_value, dict):
+            p = get_delta(actual_value, desired_value)
+            if p:
+                patch[k] = p
+    return patch
diff --git a/test/unit/test_apply.py b/test/unit/test_apply.py
@@ -0,0 +1,87 @@
+# Test ConfigMapHash and SecretHash equivalents
+# tests based on https://github.com/kubernetes/kubernetes/pull/49961
+
+from openshift.dynamic.apply import merge
+
+tests = [
+    dict(
+        last_applied = dict(
+            kind="ConfigMap",
+            metadata=dict(name="foo"),
+            data=dict(one="1", two="2")
+        ),
+        desired = dict(
+            kind="ConfigMap",
+            metadata=dict(name="foo"),
+            data=dict(one="1", two="2")
+        ),
+        expected = {}
+    ),
+    dict(
+        last_applied = dict(
+            kind="ConfigMap",
+            metadata=dict(name="foo"),
+            data=dict(one="1", two="2")
+        ),
+        desired = dict(
+            kind="ConfigMap",
+            metadata=dict(name="foo"),
+            data=dict(one="1", two="2", three="3")
+        ),
+        expected = dict(data=dict(three="3"))
+    ),
+    dict(
+        last_applied = dict(
+            kind="ConfigMap",
+            metadata=dict(name="foo"),
+            data=dict(one="1", two="2")
+        ),
+        desired = dict(
+            kind="ConfigMap",
+            metadata=dict(name="foo"),
+            data=dict(one="1", three="3")
+        ),
+        expected = dict(data=dict(two=None, three="3"))
+    ),
+    dict(
+        last_applied = dict(
+            kind="Service",
+            metadata=dict(name="foo"),
+            spec=dict(ports=[dict(port=8080, name="http")])
+        ),
+        actual = dict(
+            kind="Service",
+            metadata=dict(name="foo"),
+            spec=dict(ports=[dict(port=8080, protocol='TCP', name="http")])
+        ),
+        desired = dict(
+            kind="Service",
+            metadata=dict(name="foo"),
+            spec=dict(ports=[dict(port=8080, name="http")])
+        ),
+        expected = {}
+    ),
+    dict(
+        last_applied = dict(
+            kind="Service",
+            metadata=dict(name="foo"),
+            spec=dict(ports=[dict(port=8080, name="http")])
+        ),
+        actual = dict(
+            kind="Service",
+            metadata=dict(name="foo"),
+            spec=dict(ports=[dict(port=8080, protocol='TCP', name="http")])
+        ),
+        desired = dict(
+            kind="Service",
+            metadata=dict(name="foo"),
+            spec=dict(ports=[dict(port=8081, name="http")])
+        ),
+        expected = dict(spec=dict(ports=[dict(port=8081, name="http")]))
+    ),
+]
+
+
+def test_merges():
+    for test in tests:
+        assert(merge(test['last_applied'], test['desired'], test.get('actual', test['last_applied'])) == test['expected'])