lflow: Fix conjunction ID allocation problem with DP groups.

When DP group is enabled, a lflow attached to a DP group will be processed against each of the DPs in the group one by one. The current conjunction ID allocation algorithm didn't take into account the DP, so when a lflow is processed, if it generates conjunctions, it will try to allocate conjunction IDs repeatedly for each DP. The function lflow_conj_ids_alloc has a protection to avoid leaking the IDs - it free the existing IDs before allocating again for the same lflow. Because of this the coverage counter 'lflow_conj_free_unexpected' will increase. In most cases this is not a problem. Most likely each allocation for the same lflow would get the same ID and use it for different OVS flows with different DP metadata match, without conflict. However, there is a chance (although extremely small) that this can lead to double allocation, when different DPs have different n_conjs for the same lflow. For example: - 2 DPs (DP1 and DP2) in a DPG used by lflow A, which uses port-group PG1, in the below form: inport == @pg1 && (tcp.src == {1, 2, 3} || tcp.dst == {4, 5, 6}) This can generate flows with n_conjs of 1 or 2, depending on how many ports PG1 is evaluated to. If PG1 is evaluated with no ports, then there is no match (no flows). If PG1 is evaluated to 1 port, then there is an conjunction (n_conjs == 1) due to the '||'. If PG1 is evaluated to 2 or more ports, there will be 2 conjunctions (n_conjs == 2). - Assume PG1 is divided into SB port-groups PG1-DP1, PG1-DP2. PG1-DP1 has 1 port, PG1-DP2 has 2 ports. - Now the lflow is parsed against DP1 first, which end up with n_conjs = 1, and allocates a single conj_id (say, ID_1). - The lflow is parsed against DP2, which end up with n_conjs = 2. Now it frees the existing ID_1 allocated for the lflow, and reallocates 2 conjunction IDs. The algorithm would try to allocate ID_1 and (ID_1 + 1). However, assume that ID_1 + 1 happened to be allocated by another lflow-2 already (its uuid prefix happened to be ID_1 + 1, which is unfortunate), so the algorithm would move on to the next available continuous 2 IDs and allocate for the lflow for DP2. - At this moment, the ID_1 allocated for lflow for DP1 is still in use, but lost track in the conj_ids table (already free-ed). Now lflow-3 comes and happens to have its uuid prefix being ID_1, although very unlikely but still possible, and it requires a conjunction ID, so ID_1 is then allocated to the lflow-3. - Now ID_1 is used by OVS flows of lflow-1 and lflow-3. Double allocation! Packets hitting these flows would have unpredictable behavior. The fix is rather straightforward. Just take DP uuid as part of the allocation key, together with lflow uuid, and change the hash algorithm to consider both uuids. A major change, though, is the free part. When a lflow is deleted/reprocessed, we need to free the IDs for all the DPs, so the patch introduced another hmap index from lflow uuid to the list of DP nodes. For unit test, the algorithm itself didn't change much except for the hashing. So for the convenience of testing it introduces a test_mode so that it still uses the lflow uuid prefix for unit test only. The patch also adds a check in the PG test to make sure the counter lflow_conj_free_unexpected is always 0. The updated test would fail without this patch. Fixes: b609aee ("lflow: Consistent conjunction id generation.") Signed-off-by: Han Zhou <hzhou@ovn.org> Acked-by: Mark Michelson <mmichels@redhat.com>
ovn-org · Feb 22, 2022 · 97a502b · 97a502b
1 parent ed81be7
commit 97a502b
Show file tree

Hide file tree

Showing 6 changed files with 175 additions and 57 deletions.
diff --git a/controller/lflow-conj-ids.c b/controller/lflow-conj-ids.c
@@ -17,6 +17,8 @@
 #include "coverage.h"
 #include "lflow-conj-ids.h"
 #include "util.h"
+#include "hash.h"
+#include "openvswitch/list.h"
 
 COVERAGE_DEFINE(lflow_conj_conflict);
 COVERAGE_DEFINE(lflow_conj_alloc);
@@ -30,33 +32,65 @@ struct conj_id_node {
     uint32_t conj_id;
 };
 
-/* Node in struct conj_ids.lflow_conj_ids. */
 struct lflow_conj_node {
-    struct hmap_node hmap_node;
+    struct hmap_node hmap_node; /* Node in struct conj_ids.lflow_conj_ids. */
+    struct ovs_list list_node; /* Node in struct lflow_to_dps_node.dps. */
     struct uuid lflow_uuid;
+    struct uuid dp_uuid;
     uint32_t start_conj_id;
     uint32_t n_conjs;
 };
 
+struct lflow_to_dps_node {
+    struct hmap_node hmap_node; /* Node in struct conj_ids.lflow_to_dps. */
+    struct uuid lflow_uuid;
+    struct ovs_list dps; /* List of DPs the lflow belongs to. Each node is
+                            struct lflow_conj_node.list_node. */
+};
+
+/* XXX: Figure out a way to avoid test_mode. */
+static bool test_mode = false;
+
 static void lflow_conj_ids_insert_(struct conj_ids *,
                                    const struct uuid *lflow_uuid,
+                                   const struct uuid *dp_uuid,
                                    uint32_t start_conj_id, uint32_t n_conjs);
-static void lflow_conj_ids_free_(struct conj_ids *,
-                                 const struct uuid *lflow_uuid, bool expected);
+static void lflow_conj_ids_free_(struct conj_ids *, struct lflow_conj_node *);
+static void lflow_conj_ids_free_for_lflow_dp(struct conj_ids *,
+                                             const struct uuid *lflow_uuid,
+                                             const struct uuid *dp_uuid);
+static struct lflow_to_dps_node *lflow_to_dps_find(struct conj_ids *,
+                                                   const struct uuid *);
+static inline uint32_t
+hash_lflow_dp(const struct uuid *lflow_uuid, const struct uuid *dp_uuid)
+{
+    if (test_mode) {
+        return lflow_uuid->parts[0];
+    }
+
+    return hash_int(uuid_hash(lflow_uuid), uuid_hash(dp_uuid));
+}
+
+/* For test purpose only. */
+void
+lflow_conj_ids_set_test_mode(bool mode)
+{
+    test_mode = mode;
+}
 
 /* Allocate n_conjs continuous conjuction ids from the conj_ids for the given
- * lflow_uuid. (0 is never included in an allocated range)
+ * lflow_uuid and dp_uuid. (0 is never included in an allocated range)
  *
  * The first conjunction id is returned. If no conjunction ids available, or if
  * the input is invalid (n_conjs == 0), then 0 is returned.
  *
- * The algorithm tries to allocate the parts[0] of the input uuid as the first
- * conjunction id. If it is unavailable, or any of the subsequent n_conjs - 1
- * ids are unavailable, iterate until the next available n_conjs ids are found.
- * Given that n_conjs is very small (in most cases will be 1), the algorithm
- * should be efficient enough and in most cases just return the lflow_uuid's
- * part[0], which ensures conjunction ids are consistent for the same logical
- * flow in most cases.
+ * The algorithm tries to allocate the hash result of the combination of the
+ * lflow_uuid and dp_uuid as the first conjunction id. If it is unavailable, or
+ * any of the subsequent n_conjs - 1 ids are unavailable, iterate until the
+ * next available n_conjs ids are found.  Given that n_conjs is very small (in
+ * most cases will be 1), the algorithm should be efficient enough and in most
+ * cases just return the hash value, which ensures conjunction ids are
+ * consistent for the same logical flow + DP in most cases.
  *
  * The performance will degrade if most of the uint32_t are allocated because
  * conflicts will happen a lot. In practice this is not expected to happen in
@@ -66,16 +100,19 @@ static void lflow_conj_ids_free_(struct conj_ids *,
  * smaller than this. */
 uint32_t
 lflow_conj_ids_alloc(struct conj_ids *conj_ids, const struct uuid *lflow_uuid,
-                     uint32_t n_conjs)
+                     const struct uuid *dp_uuid, uint32_t n_conjs)
 {
     if (!n_conjs) {
         return 0;
     }
-    lflow_conj_ids_free_(conj_ids, lflow_uuid, false);
+    lflow_conj_ids_free_for_lflow_dp(conj_ids, lflow_uuid, dp_uuid);
 
     COVERAGE_INC(lflow_conj_alloc);
 
-    uint32_t start_conj_id = lflow_uuid->parts[0];
+    uint32_t start_conj_id = hash_lflow_dp(lflow_uuid, dp_uuid);
+    if (start_conj_id == 0) {
+        start_conj_id++;
+    }
     uint32_t initial_id = start_conj_id;
     bool initial = true;
     while (true) {
@@ -118,7 +155,8 @@ lflow_conj_ids_alloc(struct conj_ids *conj_ids, const struct uuid *lflow_uuid,
         }
         start_conj_id = conj_id + 1;
     }
-    lflow_conj_ids_insert_(conj_ids, lflow_uuid, start_conj_id, n_conjs);
+    lflow_conj_ids_insert_(conj_ids, lflow_uuid, dp_uuid, start_conj_id,
+                           n_conjs);
     return start_conj_id;
 }
 
@@ -130,12 +168,13 @@ lflow_conj_ids_alloc(struct conj_ids *conj_ids, const struct uuid *lflow_uuid,
 bool
 lflow_conj_ids_alloc_specified(struct conj_ids *conj_ids,
                                const struct uuid *lflow_uuid,
+                               const struct uuid *dp_uuid,
                                uint32_t start_conj_id, uint32_t n_conjs)
 {
     if (!n_conjs) {
         return false;
     }
-    lflow_conj_ids_free_(conj_ids, lflow_uuid, false);
+    lflow_conj_ids_free_for_lflow_dp(conj_ids, lflow_uuid, dp_uuid);
 
     uint32_t conj_id = start_conj_id;
     for (uint32_t i = 0; i < n_conjs; i++) {
@@ -151,7 +190,8 @@ lflow_conj_ids_alloc_specified(struct conj_ids *conj_ids,
         }
         conj_id++;
     }
-    lflow_conj_ids_insert_(conj_ids, lflow_uuid, start_conj_id, n_conjs);
+    lflow_conj_ids_insert_(conj_ids, lflow_uuid, dp_uuid, start_conj_id,
+                           n_conjs);
     COVERAGE_INC(lflow_conj_alloc_specified);
 
     return true;
@@ -161,14 +201,24 @@ lflow_conj_ids_alloc_specified(struct conj_ids *conj_ids,
 void
 lflow_conj_ids_free(struct conj_ids *conj_ids, const struct uuid *lflow_uuid)
 {
-    lflow_conj_ids_free_(conj_ids, lflow_uuid, true);
+    struct lflow_to_dps_node *ltd = lflow_to_dps_find(conj_ids, lflow_uuid);
+    if (!ltd) {
+        return;
+    }
+    struct lflow_conj_node *lflow_conj, *next;
+    LIST_FOR_EACH_SAFE (lflow_conj, next, list_node, &ltd->dps) {
+        lflow_conj_ids_free_(conj_ids, lflow_conj);
+    }
+    hmap_remove(&conj_ids->lflow_to_dps, &ltd->hmap_node);
+    free(ltd);
 }
 
 void
 lflow_conj_ids_init(struct conj_ids *conj_ids)
 {
     hmap_init(&conj_ids->conj_id_allocations);
     hmap_init(&conj_ids->lflow_conj_ids);
+    hmap_init(&conj_ids->lflow_to_dps);
 }
 
 void
@@ -185,9 +235,18 @@ lflow_conj_ids_destroy(struct conj_ids *conj_ids) {
     HMAP_FOR_EACH_SAFE (lflow_conj, l_c_next, hmap_node,
                         &conj_ids->lflow_conj_ids) {
         hmap_remove(&conj_ids->lflow_conj_ids, &lflow_conj->hmap_node);
+        ovs_list_remove(&lflow_conj->list_node);
         free(lflow_conj);
     }
     hmap_destroy(&conj_ids->lflow_conj_ids);
+
+    struct lflow_to_dps_node *ltd, *ltd_next;
+    HMAP_FOR_EACH_SAFE (ltd, ltd_next, hmap_node,
+                        &conj_ids->lflow_to_dps) {
+        hmap_remove(&conj_ids->lflow_to_dps, &ltd->hmap_node);
+        free(ltd);
+    }
+    hmap_destroy(&conj_ids->lflow_to_dps);
 }
 
 void lflow_conj_ids_clear(struct conj_ids *conj_ids) {
@@ -204,10 +263,11 @@ lflow_conj_ids_dump(struct conj_ids *conj_ids, struct ds *out_data)
     ds_put_cstr(out_data, "Conjunction IDs allocations:\n");
     HMAP_FOR_EACH (lflow_conj, hmap_node, &conj_ids->lflow_conj_ids) {
         bool has_conflict =
-            (lflow_conj->start_conj_id != lflow_conj->lflow_uuid.parts[0]);
-        ds_put_format(out_data, "lflow: "UUID_FMT", start: %"PRIu32
-                      ", n: %"PRIu32"%s\n",
+            (lflow_conj->start_conj_id != lflow_conj->hmap_node.hash);
+        ds_put_format(out_data, "lflow: "UUID_FMT", dp: "UUID_FMT", start: %"
+                      PRIu32", n: %"PRIu32"%s\n",
                       UUID_ARGS(&lflow_conj->lflow_uuid),
+                      UUID_ARGS(&lflow_conj->dp_uuid),
                       lflow_conj->start_conj_id,
                       lflow_conj->n_conjs,
                       has_conflict ? " (*)" : "");
@@ -224,11 +284,25 @@ lflow_conj_ids_dump(struct conj_ids *conj_ids, struct ds *out_data)
     }
 }
 
+static struct lflow_to_dps_node *
+lflow_to_dps_find(struct conj_ids *conj_ids, const struct uuid *lflow_uuid)
+{
+    struct lflow_to_dps_node *ltd;
+    HMAP_FOR_EACH_WITH_HASH (ltd, hmap_node, uuid_hash(lflow_uuid),
+                             &conj_ids->lflow_to_dps) {
+        if (uuid_equals(&ltd->lflow_uuid, lflow_uuid)) {
+            return ltd;
+        }
+    }
+    return NULL;
+}
+
 /* Insert n_conjs conjuntion ids starting from start_conj_id into the conj_ids,
  * assuming the ids are confirmed to be available. */
 static void
 lflow_conj_ids_insert_(struct conj_ids *conj_ids,
                        const struct uuid *lflow_uuid,
+                       const struct uuid *dp_uuid,
                        uint32_t start_conj_id, uint32_t n_conjs)
 {
     ovs_assert(n_conjs);
@@ -243,36 +317,46 @@ lflow_conj_ids_insert_(struct conj_ids *conj_ids,
 
     struct lflow_conj_node *lflow_conj = xzalloc(sizeof *lflow_conj);
     lflow_conj->lflow_uuid = *lflow_uuid;
+    lflow_conj->dp_uuid = *dp_uuid;
     lflow_conj->start_conj_id = start_conj_id;
     lflow_conj->n_conjs = n_conjs;
     hmap_insert(&conj_ids->lflow_conj_ids, &lflow_conj->hmap_node,
-                uuid_hash(lflow_uuid));
+                hash_lflow_dp(lflow_uuid, dp_uuid));
+
+    struct lflow_to_dps_node *ltd = lflow_to_dps_find(conj_ids, lflow_uuid);
+    if (!ltd) {
+        ltd = xmalloc(sizeof *ltd);
+        ltd->lflow_uuid = *lflow_uuid;
+        ovs_list_init(&ltd->dps);
+        hmap_insert(&conj_ids->lflow_to_dps, &ltd->hmap_node,
+                    uuid_hash(lflow_uuid));
+    }
+    ovs_list_insert(&ltd->dps, &lflow_conj->list_node);
 }
 
-static void
-lflow_conj_ids_free_(struct conj_ids *conj_ids, const struct uuid *lflow_uuid,
-                     bool expected)
+static struct lflow_conj_node *
+lflow_conj_ids_find_(struct conj_ids *conj_ids,
+                     const struct uuid *lflow_uuid,
+                     const struct uuid *dp_uuid)
 {
     struct lflow_conj_node *lflow_conj;
-    bool found = false;
-    HMAP_FOR_EACH_WITH_HASH (lflow_conj, hmap_node, uuid_hash(lflow_uuid),
+    HMAP_FOR_EACH_WITH_HASH (lflow_conj, hmap_node,
+                             hash_lflow_dp(lflow_uuid, dp_uuid),
                              &conj_ids->lflow_conj_ids) {
-        if (uuid_equals(&lflow_conj->lflow_uuid, lflow_uuid)) {
-            found = true;
-            break;
+        if (uuid_equals(&lflow_conj->lflow_uuid, lflow_uuid) &&
+            uuid_equals(&lflow_conj->dp_uuid, dp_uuid)) {
+            return lflow_conj;
         }
     }
-    if (!found) {
-        return;
-    }
+    return NULL;
+}
+
+static void
+lflow_conj_ids_free_(struct conj_ids *conj_ids,
+                     struct lflow_conj_node *lflow_conj)
+{
     ovs_assert(lflow_conj->n_conjs);
     COVERAGE_INC(lflow_conj_free);
-    if (!expected) {
-        /* It is unexpected that an entry is found when calling from
-         * alloc/alloc_specified. Something may be wrong in the lflow module.
-         */
-        COVERAGE_INC(lflow_conj_free_unexpected);
-    }
     uint32_t conj_id = lflow_conj->start_conj_id;
     for (uint32_t i = 0; i < lflow_conj->n_conjs; i++) {
         ovs_assert(conj_id);
@@ -290,5 +374,25 @@ lflow_conj_ids_free_(struct conj_ids *conj_ids, const struct uuid *lflow_uuid,
     }
 
     hmap_remove(&conj_ids->lflow_conj_ids, &lflow_conj->hmap_node);
+    ovs_list_remove(&lflow_conj->list_node);
     free(lflow_conj);
 }
+
+static void
+lflow_conj_ids_free_for_lflow_dp(struct conj_ids *conj_ids,
+                                 const struct uuid *lflow_uuid,
+                                 const struct uuid *dp_uuid)
+{
+    struct lflow_conj_node *lflow_conj = lflow_conj_ids_find_(conj_ids,
+                                                              lflow_uuid,
+                                                              dp_uuid);
+    if (!lflow_conj) {
+        return;
+    }
+
+    /* It is unexpected that an entry is found because this is called only by
+     * alloc/alloc_specified. Something may be wrong in the lflow module.
+     */
+    COVERAGE_INC(lflow_conj_free_unexpected);
+    lflow_conj_ids_free_(conj_ids, lflow_conj);
+}
diff --git a/controller/lflow-conj-ids.h b/controller/lflow-conj-ids.h
@@ -24,20 +24,25 @@
 struct conj_ids {
     /* Allocated conjunction ids. Contains struct conj_id_node. */
     struct hmap conj_id_allocations;
-    /* A map from lflows to the conjunction ids used. Contains struct
+    /* A map from lflow + DP to the conjunction ids used. Contains struct
      * lflow_conj_node. */
     struct hmap lflow_conj_ids;
+    /* A map from lflow to the list of DPs this lflow belongs to. Contains
+     * struct lflow_to_dps_node. */
+    struct hmap lflow_to_dps;
 };
 
 uint32_t lflow_conj_ids_alloc(struct conj_ids *, const struct uuid *lflow_uuid,
-                              uint32_t n_conjs);
+                              const struct uuid *dp_uuid, uint32_t n_conjs);
 bool lflow_conj_ids_alloc_specified(struct conj_ids *,
                                     const struct uuid *lflow_uuid,
+                                    const struct uuid *dp_uuid,
                                     uint32_t start_conj_id, uint32_t n_conjs);
 void lflow_conj_ids_free(struct conj_ids *, const struct uuid *lflow_uuid);
 void lflow_conj_ids_init(struct conj_ids *);
 void lflow_conj_ids_destroy(struct conj_ids *);
 void lflow_conj_ids_clear(struct conj_ids *);
 void lflow_conj_ids_dump(struct conj_ids *, struct ds *out_data);
+void lflow_conj_ids_set_test_mode(bool);
 
 #endif /* controller/lflow-conj-ids.h */
diff --git a/controller/lflow.c b/controller/lflow.c
@@ -892,6 +892,7 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow,
         && lcv->n_conjs
         && !lflow_conj_ids_alloc_specified(l_ctx_out->conj_ids,
                                            &lflow->header_.uuid,
+                                           &dp->header_.uuid,
                                            lcv->conj_id_ofs, lcv->n_conjs)) {
         /* This should happen very rarely. */
         VLOG_DBG("lflow "UUID_FMT" match cached with conjunctions, but the"
@@ -955,6 +956,7 @@ consider_logical_flow__(const struct sbrec_logical_flow *lflow,
         if (n_conjs) {
             start_conj_id = lflow_conj_ids_alloc(l_ctx_out->conj_ids,
                                                  &lflow->header_.uuid,
+                                                 &dp->header_.uuid,
                                                  n_conjs);
             if (!start_conj_id) {
                 VLOG_ERR("32-bit conjunction ids exhausted!");

diff --git a/controller/test-lflow-conj-ids.c b/controller/test-lflow-conj-ids.c
@@ -44,7 +44,9 @@ test_conj_ids_operations(struct ovs_cmdl_context *ctx)
     unsigned int shift = 1;
     unsigned int n_ops;
     struct conj_ids conj_ids;
+    struct uuid dp_uuid = UUID_ZERO;
     lflow_conj_ids_init(&conj_ids);
+    lflow_conj_ids_set_test_mode(true);
 
     if (!test_read_uint_value(ctx, shift++, "n_ops", &n_ops)) {
         goto done;
@@ -69,7 +71,7 @@ test_conj_ids_operations(struct ovs_cmdl_context *ctx)
             }
 
             uint32_t start_conj_id = lflow_conj_ids_alloc(&conj_ids, &uuid,
-                                                          n_conjs);
+                                                          &dp_uuid, n_conjs);
             printf("alloc("UUID_FMT", %"PRIu32"): 0x%"PRIx32"\n",
                    UUID_ARGS(&uuid), n_conjs, start_conj_id);
         } else if (!strcmp(op, "alloc-specified")) {
@@ -90,7 +92,8 @@ test_conj_ids_operations(struct ovs_cmdl_context *ctx)
             }
 
             bool ret = lflow_conj_ids_alloc_specified(&conj_ids, &uuid,
-                                                      start_conj_id, n_conjs);
+                                                      &dp_uuid, start_conj_id,
+                                                      n_conjs);
             printf("alloc_specified("UUID_FMT", 0x%"PRIx32", %"PRIu32"): %s\n",
                    UUID_ARGS(&uuid), start_conj_id, n_conjs,
                    ret ? "true" : "false");