Skip to content

Commit

Permalink
ovn-controller: Handle addresses deletion in address set incrementally.
Browse files Browse the repository at this point in the history
The cost of reprocessing a lflow referencing a big address set can be very
high. Now a single address change in an address set would cause the
related logical flows being reprocessed. When the change rate of an
address set is high, ovn-controller would be busy reprocessing lflows.

This patch handles addresses deletion incrementally. It deletes the
related flows for the deleted addresses only, without deleting
and recreating unrelated flows unnecessarily.

Scale test shows obvious performance gains because the time complexity
changed from O(n) to O(1). The bigger the size of address set, the more
CPU savings. With the AS size of 10k, the test shows ~40x speed up.

Test setup:
CPU: Intel(R) Core(TM) i9-7920X CPU @ 2.90GHz.
5 ACL all referencing an address set of 10,000 IPs.

Measure the time spent by ovn-controller for handling one IP deletion
from the address set.

Before: ~400ms
After: 11-12ms

There is memory cost increase, due to the index built to track each
individual IP. The total memory cost for the OF flows in ovn-controller
increased ~20% in the 10k AS size test.

Before:
ofctrl_desired_flow_usage-KB:22248
ofctrl_installed_flow_usage-KB:14850
ofctrl_sb_flow_ref_usage-KB:7208

After:
ofctrl_desired_flow_usage-KB:22248
ofctrl_installed_flow_usage-KB:14850
ofctrl_sb_flow_ref_usage-KB:15551

Signed-off-by: Han Zhou <hzhou@ovn.org>
Acked-by: Mark Michelson <mmichels@redhat.com>
Acked-by: Numan Siddique <numans@ovn.org>
  • Loading branch information
hzhou8 committed Feb 24, 2022
1 parent aa3e4e8 commit 6a60154
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 15 deletions.
149 changes: 149 additions & 0 deletions controller/lflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,155 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in,
return ret;
}

static bool
as_info_from_expr_const(const char *as_name, const union expr_constant *c,
struct addrset_info *as_info)
{
as_info->name = as_name;
as_info->ip = c->value.ipv6;
if (c->masked) {
as_info->mask = c->mask.ipv6;
} else {
/* Generate mask so that it is the same as what's added for
* expr->cmp.mask. See make_cmp__() in expr.c. */
union mf_subvalue mask;
memset(&mask, 0, sizeof mask);
if (c->format == LEX_F_IPV4) {
mask.ipv4 = be32_prefix_mask(32);
} else if (c->format == LEX_F_IPV6) {
mask.ipv6 = ipv6_create_mask(128);
} else if (c->format == LEX_F_ETHERNET) {
mask.mac = eth_addr_exact;
} else {
/* Not an address */
return false;
}
as_info->mask = mask.ipv6;
}
return true;
}

static bool
as_update_can_be_handled(const char *as_name, struct addr_set_diff *as_diff,
struct lflow_ctx_in *l_ctx_in)
{
struct expr_constant_set *as = shash_find_data(l_ctx_in->addr_sets,
as_name);
ovs_assert(as);
size_t n_added = as_diff->added ? as_diff->added->n_values : 0;
size_t n_deleted = as_diff->deleted ? as_diff->deleted->n_values : 0;
size_t old_as_size = as->n_values + n_deleted - n_added;

/* If the change may impact n_conj, i.e. the template of the flows would
* change, we must reprocess the lflow. */
if (old_as_size <= 1 || as->n_values <= 1) {
return false;
}

/* If the size of the diff is too big, reprocessing may be more
* efficient than incrementally processing the diffs. */
if ((n_added + n_deleted) >= as->n_values) {
return false;
}

return true;
}

/* Handles address set update incrementally - processes only the diff
* (added/deleted) addresses in the address set. If it cannot handle the update
* incrementally, returns false, so that the caller will trigger reprocessing
* for the lflow.
*
* The reasons that the function returns false are:
*
* - The size of the address set changed to/from 0 or 1, which means the
* 'template' of the lflow translation is changed. In this case reprocessing
* doesn't impact performance because the size of the address set is already
* very small.
*
* - The size of the change is equal or bigger than the new size. In this case
* it doesn't make sense to incrementally processing the changes because
* reprocessing can be faster.
*
* - When the address set information couldn't be properly tracked during lflow
* parsing. The typical cases are:
*
* - The relational operator to the address set is not '=='. In this case
* there is no 1-1 mapping between the addresses and the flows
* generated.
*
* - The sub expression of the address set is combined with other sub-
* expressions/constants, usually because of disjunctions between
* sub-expressions/constants, e.g.:
*
* ip.src == $as1 || ip.dst == $as2
* ip.src == {$as1, $as2}
* ip.src == {$as1, ip1}
*
* All these could have been split into separate lflows.
*
* - Conjunctions overlapping between lflows, which can be caused by
* overlapping address sets or same address set used by multiple lflows
* that could have been combined. e.g.:
*
* lflow1: ip.src == $as1 && tcp.dst == {p1, p2}
* lflow2: ip.src == $as1 && tcp.dst == {p3, p4}
*
* It could have been combined as:
*
* ip.src == $as1 && tcp.dst == {p1, p2, p3, p4}
*/
bool
lflow_handle_addr_set_update(const char *as_name,
struct addr_set_diff *as_diff,
struct lflow_ctx_in *l_ctx_in OVS_UNUSED,
struct lflow_ctx_out *l_ctx_out,
bool *changed)
{
if (as_diff->added) {
return false;
}
ovs_assert(as_diff->deleted);

if (!as_update_can_be_handled(as_name, as_diff, l_ctx_in)) {
return false;
}

struct ref_lflow_node *rlfn =
ref_lflow_lookup(&l_ctx_out->lfrr->ref_lflow_table, REF_TYPE_ADDRSET,
as_name);
if (!rlfn) {
*changed = false;
return true;
}

*changed = false;

struct lflow_ref_list_node *lrln;
HMAP_FOR_EACH (lrln, hmap_node, &rlfn->lflow_uuids) {
if (lflows_processed_find(l_ctx_out->lflows_processed,
&lrln->lflow_uuid)) {
VLOG_DBG("lflow "UUID_FMT"has been processed, skip.",
UUID_ARGS(&lrln->lflow_uuid));
continue;
}
for (size_t i = 0; i < as_diff->deleted->n_values; i++) {
union expr_constant *c = &as_diff->deleted->values[i];
struct addrset_info as_info;
if (!as_info_from_expr_const(as_name, c, &as_info)) {
continue;
}
if (!ofctrl_remove_flows_for_as_ip(l_ctx_out->flow_table,
&lrln->lflow_uuid, &as_info,
lrln->ref_count)) {
return false;
}
*changed = true;
}
}
return true;
}

bool
lflow_handle_changed_ref(enum ref_type ref_type, const char *ref_name,
struct lflow_ctx_in *l_ctx_in,
Expand Down
10 changes: 10 additions & 0 deletions controller/lflow.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,16 @@ bool lflow_handle_changed_flows(struct lflow_ctx_in *, struct lflow_ctx_out *);
bool lflow_handle_changed_ref(enum ref_type, const char *ref_name,
struct lflow_ctx_in *, struct lflow_ctx_out *,
bool *changed);

struct addr_set_diff {
struct expr_constant_set *added;
struct expr_constant_set *deleted;
};
bool lflow_handle_addr_set_update(const char *as_name, struct addr_set_diff *,
struct lflow_ctx_in *,
struct lflow_ctx_out *,
bool *changed);

void lflow_handle_changed_neighbors(
struct ovsdb_idl_index *sbrec_port_binding_by_name,
const struct sbrec_mac_binding_table *,
Expand Down
71 changes: 71 additions & 0 deletions controller/ofctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1349,6 +1349,77 @@ ofctrl_flood_remove_flows(struct ovn_desired_flow_table *flow_table,
}
}

/* Remove desired flows related to the specified 'addrset_info' for the
* 'lflow_uuid'. Returns true if it can be processed completely, otherwise
* returns false, which would trigger a reprocessing of the lflow of
* 'lflow_uuid'. The expected_count is checked against the actual flows
* deleted, and if it doesn't match, return false, too. */
bool
ofctrl_remove_flows_for_as_ip(struct ovn_desired_flow_table *flow_table,
const struct uuid *lflow_uuid,
const struct addrset_info *as_info,
size_t expected_count)
{
struct sb_to_flow *stf = sb_to_flow_find(&flow_table->uuid_flow_table,
lflow_uuid);
if (!stf) {
/* No such flow, nothing needs to be done. */
return true;
}

struct sb_addrset_ref *sar;
bool found = false;
LIST_FOR_EACH (sar, list_node, &stf->addrsets) {
if (!strcmp(sar->name, as_info->name)) {
found = true;
break;
}
}
if (!found) {
/* No address set tracking infomation found, can't perform the
* deletion. */
return false;
}

struct as_ip_to_flow_node *itfn =
as_ip_to_flow_find(&sar->as_ip_to_flow_map, &as_info->ip,
&as_info->mask);
if (!itfn) {
/* This ip wasn't tracked, probably because it maps to a flow that has
* compound conjunction actions for the same ip from multiple address
* sets. */
return false;
}
struct sb_flow_ref *sfr, *next;
size_t count = 0;
LIST_FOR_EACH_SAFE (sfr, next, as_ip_flow_list, &itfn->flows) {
/* If the desired flow is referenced by multiple sb lflows, it
* shouldn't have been indexed by address set. */
ovs_assert(ovs_list_is_short(&sfr->sb_list));

ovs_list_remove(&sfr->sb_list);
ovs_list_remove(&sfr->flow_list);
ovs_list_remove(&sfr->as_ip_flow_list);

struct desired_flow *f = sfr->flow;
mem_stats.sb_flow_ref_usage -= sb_flow_ref_size(sfr);
free(sfr);

ovs_assert(ovs_list_is_empty(&f->list_node));
ovs_assert(ovs_list_is_empty(&f->references));
ovn_flow_log(&f->flow, "remove_flows_for_as_ip");
hmap_remove(&flow_table->match_flow_table,
&f->match_hmap_node);
track_or_destroy_for_flow_del(flow_table, f);
count++;
}

hmap_remove(&sar->as_ip_to_flow_map, &itfn->hmap_node);
mem_stats.sb_flow_ref_usage -= sizeof *itfn;
free(itfn);
return (count == expected_count);
}

/* Remove ovn_flows for the given "sb_to_flow" node in the uuid_flow_table.
* Optionally log the message for each flow that is acturally removed, if
* log_msg is not NULL. */
Expand Down
5 changes: 5 additions & 0 deletions controller/ofctrl.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ void ofctrl_flood_remove_flows(struct ovn_desired_flow_table *,
struct hmap *flood_remove_nodes);
void ofctrl_flood_remove_add_node(struct hmap *flood_remove_nodes,
const struct uuid *sb_uuid);
bool ofctrl_remove_flows_for_as_ip(struct ovn_desired_flow_table *,
const struct uuid *lflow_uuid,
const struct addrset_info *,
size_t expected_count);

void ovn_desired_flow_table_init(struct ovn_desired_flow_table *);
void ovn_desired_flow_table_clear(struct ovn_desired_flow_table *);
void ovn_desired_flow_table_destroy(struct ovn_desired_flow_table *);
Expand Down
25 changes: 17 additions & 8 deletions controller/ovn-controller.c
Original file line number Diff line number Diff line change
Expand Up @@ -1416,11 +1416,6 @@ en_addr_sets_init(struct engine_node *node OVS_UNUSED,
return as;
}

struct addr_set_diff {
struct expr_constant_set *added;
struct expr_constant_set *deleted;
};

static void
en_addr_sets_clear_tracked_data(void *data)
{
Expand Down Expand Up @@ -1492,6 +1487,14 @@ addr_sets_update(const struct sbrec_address_set_table *address_set_table,
expr_constant_set_integers_diff(cs_old, cs_new,
&as_diff->added,
&as_diff->deleted);
if (!as_diff->added && !as_diff->deleted) {
/* The address set may have been updated, but the change
* doesn't has any impact to the generated constant-set.
* For example, ff::01 is changed to ff::00:01. */
free(as_diff);
expr_constant_set_destroy(cs_new);
continue;
}
shash_add(updated, as->name, as_diff);
expr_const_sets_add(addr_sets, as->name, cs_new);
}
Expand Down Expand Up @@ -2549,9 +2552,15 @@ lflow_output_addr_sets_handler(struct engine_node *node, void *data)
}
struct shash_node *shash_node;
SHASH_FOR_EACH (shash_node, &as_data->updated) {
if (!lflow_handle_changed_ref(REF_TYPE_ADDRSET, shash_node->name,
&l_ctx_in, &l_ctx_out, &changed)) {
return false;
struct addr_set_diff *as_diff = shash_node->data;
if (!lflow_handle_addr_set_update(shash_node->name, as_diff, &l_ctx_in,
&l_ctx_out, &changed)) {
VLOG_DBG("Can't incrementally handle the change of address set %s."
" Reprocess related lflows.", shash_node->name);
if (!lflow_handle_changed_ref(REF_TYPE_ADDRSET, shash_node->name,
&l_ctx_in, &l_ctx_out, &changed)) {
return false;
}
}
if (changed) {
engine_set_node_state(node, EN_UPDATED);
Expand Down
20 changes: 13 additions & 7 deletions tests/ovn-controller.at
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,9 @@ priority=1100,ip,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10 actions=d
done

reprocess_count_new=$(read_counter consider_logical_flow)
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
# when the old/new AS size is smaller than 2, fallback to reprocessing, so
# there are still 2 reprocessing when the AS size is below 2.
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
])

# Add IPs to as1 for 10 times, 2 IPs each time.
Expand Down Expand Up @@ -1135,7 +1137,7 @@ priority=1100,tcp,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10,tp_dst=3
done

reprocess_count_new=$(read_counter consider_logical_flow)
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
])

# Add IPs to as1 for 10 times, 2 IPs each time.
Expand Down Expand Up @@ -1330,7 +1332,7 @@ priority=1100,ip,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10,nw_dst=10
done

reprocess_count_new=$(read_counter consider_logical_flow)
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
])

# Add 1 IP back to both ASes
Expand Down Expand Up @@ -1386,7 +1388,7 @@ priority=1100,ip,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10,nw_dst=10
done

reprocess_count_new=$(read_counter consider_logical_flow)
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
])

OVN_CLEANUP([hv1])
Expand Down Expand Up @@ -1480,6 +1482,8 @@ for i in $(seq 10); do
done

reprocess_count_new=$(read_counter consider_logical_flow)
# In this case the two sub expr for as1 and as2 are merged, so we lose track of
# address set information - can't handle deletion incrementally.
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
])

Expand Down Expand Up @@ -1580,6 +1584,8 @@ for i in $(seq 10); do
done

reprocess_count_new=$(read_counter consider_logical_flow)
# In this case the as1 and as2 are merged to a single OR expr, so we lose track of
# address set information - can't handle deletion incrementally.
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
])

Expand Down Expand Up @@ -1675,7 +1681,7 @@ priority=1100,ip,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10,nw_dst=10
done

reprocess_count_new=$(read_counter consider_logical_flow)
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
])

# Add IPs to as1 for 10 times, 2 IPs each time.
Expand Down Expand Up @@ -1963,7 +1969,7 @@ priority=1100,reg15=0x$port_key,metadata=0x$dp_key,dl_src=aa:aa:aa:aa:aa:05 acti
done

reprocess_count_new=$(read_counter consider_logical_flow)
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [5
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
])

OVN_CLEANUP([hv1])
Expand Down Expand Up @@ -2043,7 +2049,7 @@ priority=1100,ipv6,reg15=0x$port_key,metadata=0x$dp_key,ipv6_src=ff::5 actions=d
done

reprocess_count_new=$(read_counter consider_logical_flow)
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [5
AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
])

OVN_CLEANUP([hv1])
Expand Down

0 comments on commit 6a60154

Please sign in to comment.