Skip to content

Commit

Permalink
northd: Support CIDR-based MAC binding aging threshold.
Browse files Browse the repository at this point in the history
Enhance MAC_Binding aging to allow CIDR-based threshold configurations.
This enables distinct threshold settings for different IP ranges,
applying the longest prefix matching for overlapping ranges.

A common use case involves setting a default threshold for all IPs, while
disabling aging for a specific range and potentially excluding a subset
within that range.

Signed-off-by: Han Zhou <hzhou@ovn.org>
Acked-by: Mark Michelson <mmichels@redhat.com>
  • Loading branch information
hzhou8 committed Dec 29, 2023
1 parent ef45415 commit 78851b6
Show file tree
Hide file tree
Showing 6 changed files with 409 additions and 21 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Post v23.09.0
connection method and doesn't require additional probing.
external_ids:ovn-openflow-probe-interval configuration option for
ovn-controller no longer matters and is ignored.
- Support CIDR based MAC binding aging threshold. See ovn-nb(5) for
'mac_binding_age_threshold' for more details.

OVN v23.09.0 - 15 Sep 2023
--------------------------
Expand Down
291 changes: 276 additions & 15 deletions northd/aging.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,250 @@ aging_waker_schedule_next_wake(struct aging_waker *waker, int64_t next_wake_ms)
}
}

struct threshold_entry {
union {
ovs_be32 ipv4;
struct in6_addr ipv6;
} prefix;
bool is_v4;
unsigned int plen;
unsigned int threshold;
};

/* Contains CIDR-based aging threshold configuration parsed from
* "Logical_Router:options:mac_binding_age_threshold".
*
* This struct is also used for non-CIDR-based threshold, e.g. the ones from
* "NB_Global:other_config:fdb_age_threshold" for the common aging_context
* interface.
*
* - The arrays `v4_entries` and `v6_entries` are populated with parsed entries
* for IPv4 and IPv6 CIDRs, respectively, along with their associated
* thresholds. Entries within these arrays are sorted by prefix length,
* starting with the longest.
*
* - If a threshold is provided without an accompanying prefix, it's captured
* in `default_threshold`. In cases with multiple unprefixed thresholds,
* `default_threshold` will only store the last one. */
struct threshold_config {
struct threshold_entry *v4_entries;
size_t n_v4_entries;
struct threshold_entry *v6_entries;
size_t n_v6_entries;
unsigned int default_threshold;
};

static int
compare_entries_by_prefix_length(const void *a, const void *b)
{
const struct threshold_entry *entry_a = a;
const struct threshold_entry *entry_b = b;

return entry_b->plen - entry_a->plen;
}

/* Parse an ENTRY in the threshold option, with the format:
* [CIDR:]THRESHOLD
*
* Returns true if successful, false if failed. */
static bool
parse_threshold_entry(const char *str, struct threshold_entry *entry)
{
char *colon_ptr;
unsigned int value;
const char *threshold_str;

colon_ptr = strrchr(str, ':');
if (!colon_ptr) {
threshold_str = str;
entry->plen = 0;
} else {
threshold_str = colon_ptr + 1;
}

if (!str_to_uint(threshold_str, 10, &value)) {
return false;
}
entry->threshold = value;

if (!colon_ptr) {
return true;
}

/* ":" was found, so parse the string before ":" as a cidr. */
char ip_cidr[128];
ovs_strzcpy(ip_cidr, str, MIN(colon_ptr - str + 1, sizeof ip_cidr));
char *error = ip_parse_cidr(ip_cidr, &entry->prefix.ipv4, &entry->plen);
if (!error) {
entry->is_v4 = true;
return true;
}
free(error);
error = ipv6_parse_cidr(ip_cidr, &entry->prefix.ipv6, &entry->plen);
if (!error) {
entry->is_v4 = false;
return true;
}
free(error);
return false;
}

static void
threshold_config_destroy(struct threshold_config *config)
{
free(config->v4_entries);
free(config->v6_entries);
config->v4_entries = config->v6_entries = NULL;
config->n_v4_entries = config->n_v6_entries = 0;
config->default_threshold = 0;
}

/* Parse the threshold option string, which has the format:
* ENTRY[;ENTRY[...]]
*
* For the exact format of ENTRY, refer to the function
* `parse_threshold_entry`.
*
* The parsed data is populated to the struct threshold_config.
* See the comments of struct threshold_config for details.
*
* Return Values:
* - Returns `false` if the input does not match the expected format.
* Consequently, no entries will be populated.
* - Returns `true` upon successful parsing. The caller is responsible for
* releasing the allocated memory by calling threshold_config_destroy. */
static bool
parse_aging_threshold(const char *opt,
struct threshold_config *config)
{
if (!opt) {
return false;
}

static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
struct threshold_entry e;
char *token, *saveptr = NULL;
char *opt_copy = xstrdup(opt);
bool result = true;

memset(config, 0, sizeof *config);

for (token = strtok_r(opt_copy, ";", &saveptr); token != NULL;
token = strtok_r(NULL, ";", &saveptr)) {
if (!parse_threshold_entry(token, &e)) {
VLOG_WARN_RL(&rl, "Parsing aging threshold '%s' failed.", token);
result = false;
goto exit;
}

if (!e.plen) {
config->default_threshold = e.threshold;
} else if (e.is_v4) {
config->n_v4_entries++;
config->v4_entries = xrealloc(config->v4_entries,
config->n_v4_entries * sizeof e);
config->v4_entries[config->n_v4_entries - 1] = e;
} else {
config->n_v6_entries++;
config->v6_entries = xrealloc(config->v6_entries,
config->n_v6_entries * sizeof e);
config->v6_entries[config->n_v6_entries - 1] = e;
}
}

if (config->n_v4_entries > 0) {
qsort(config->v4_entries, config->n_v4_entries, sizeof e,
compare_entries_by_prefix_length);
}

if (config->n_v6_entries > 0) {
qsort(config->v6_entries, config->n_v6_entries, sizeof e,
compare_entries_by_prefix_length);
}

exit:
free(opt_copy);
if (!result) {
threshold_config_destroy(config);
}
return result;
}

static unsigned int
find_threshold_for_ip(const char *ip_str,
const struct threshold_config *config)
{
if (!ip_str) {
return config->default_threshold;
}

ovs_be32 ipv4;
struct in6_addr ipv6;
if (ip_parse(ip_str, &ipv4)) {
for (int i = 0; i < config->n_v4_entries; i++) {
ovs_be32 masked_ip = ipv4 &
be32_prefix_mask(config->v4_entries[i].plen);
if (masked_ip == config->v4_entries[i].prefix.ipv4) {
return config->v4_entries[i].threshold;
}
}
} else if (ipv6_parse(ip_str, &ipv6)) {
for (int i = 0; i < config->n_v6_entries; i++) {
struct in6_addr v6_mask =
ipv6_create_mask(config->v6_entries[i].plen);
struct in6_addr masked_ip = ipv6_addr_bitand(&ipv6, &v6_mask);
if (ipv6_addr_equals(&masked_ip,
&config->v6_entries[i].prefix.ipv6)) {
return config->v6_entries[i].threshold;
}
}
}
return config->default_threshold;
}

/* Parse the threshold option string (see the comment of the function
* parse_aging_threshold), and returns the smallest threshold. */
unsigned int
min_mac_binding_age_threshold(const char *opt)
{
struct threshold_config config;
if (!parse_aging_threshold(opt, &config)) {
return 0;
}

unsigned int threshold = UINT_MAX;
unsigned int t;

for (int i = 0; i < config.n_v4_entries; i++) {
t = config.v4_entries[i].threshold;
if (t && t < threshold) {
threshold = t;
}
}

for (int i = 0; i < config.n_v6_entries; i++) {
t = config.v6_entries[i].threshold;
if (t && t < threshold) {
threshold = t;
}
}

t = config.default_threshold;
if (t && t < threshold) {
threshold = t;
}

threshold_config_destroy(&config);

return threshold == UINT_MAX ? 0 : threshold;
}

struct aging_context {
int64_t next_wake_ms;
int64_t time_wall_now;
uint32_t removal_limit;
uint32_t n_removed;
uint64_t threshold;
struct threshold_config *threshold;
};

static struct aging_context
Expand All @@ -63,13 +301,14 @@ aging_context_init(uint32_t removal_limit)
.time_wall_now = time_wall_msec(),
.removal_limit = removal_limit,
.n_removed = 0,
.threshold = 0,
.threshold = NULL,
};
return ctx;
}

static void
aging_context_set_threshold(struct aging_context *ctx, uint64_t threshold)
aging_context_set_threshold(struct aging_context *ctx,
struct threshold_config *threshold)
{
ctx->threshold = threshold;
}
Expand All @@ -81,19 +320,27 @@ aging_context_is_at_limit(struct aging_context *ctx)
}

static bool
aging_context_handle_timestamp(struct aging_context *ctx, int64_t timestamp)
aging_context_handle_timestamp(struct aging_context *ctx, int64_t timestamp,
const char *ip)
{
int64_t elapsed = ctx->time_wall_now - timestamp;
if (elapsed < 0) {
return false;
}

if (elapsed >= ctx->threshold) {
ovs_assert(ctx->threshold);
uint64_t threshold = 1000 * find_threshold_for_ip(ip, ctx->threshold);

if (!threshold) {
return false;
}

if (elapsed >= threshold) {
ctx->n_removed++;
return true;
}

ctx->next_wake_ms = MIN(ctx->next_wake_ms, (ctx->threshold - elapsed));
ctx->next_wake_ms = MIN(ctx->next_wake_ms, (threshold - elapsed));
return false;
}

Expand Down Expand Up @@ -121,13 +368,18 @@ mac_binding_aging_run_for_datapath(const struct sbrec_datapath_binding *dp,
return;
}

if (!ctx->threshold->n_v4_entries && !ctx->threshold->n_v6_entries
&& !ctx->threshold->default_threshold) {
return;
}

struct sbrec_mac_binding *mb_index_row =
sbrec_mac_binding_index_init_row(mb_by_datapath);
sbrec_mac_binding_index_set_datapath(mb_index_row, dp);

const struct sbrec_mac_binding *mb;
SBREC_MAC_BINDING_FOR_EACH_EQUAL (mb, mb_index_row, mb_by_datapath) {
if (aging_context_handle_timestamp(ctx, mb->timestamp)) {
if (aging_context_handle_timestamp(ctx, mb->timestamp, mb->ip)) {
sbrec_mac_binding_delete(mb);
if (aging_context_is_at_limit(ctx)) {
break;
Expand Down Expand Up @@ -166,13 +418,20 @@ en_mac_binding_aging_run(struct engine_node *node, void *data OVS_UNUSED)
continue;
}

uint64_t threshold = smap_get_uint(&od->nbr->options,
"mac_binding_age_threshold", 0);
aging_context_set_threshold(&ctx, threshold * 1000);
struct threshold_config threshold_config;
if (!parse_aging_threshold(smap_get(&od->nbr->options,
"mac_binding_age_threshold"),
&threshold_config)) {
return;
}

aging_context_set_threshold(&ctx, &threshold_config);

mac_binding_aging_run_for_datapath(od->sb,
sbrec_mac_binding_by_datapath,
&ctx);
threshold_config_destroy(&threshold_config);

if (aging_context_is_at_limit(&ctx)) {
/* Schedule the next run after specified delay. */
ctx.next_wake_ms = AGING_BULK_REMOVAL_DELAY_MSEC;
Expand Down Expand Up @@ -255,7 +514,7 @@ fdb_run_for_datapath(const struct sbrec_datapath_binding *dp,

const struct sbrec_fdb *fdb;
SBREC_FDB_FOR_EACH_EQUAL (fdb, fdb_index_row, fdb_by_dp_key) {
if (aging_context_handle_timestamp(ctx, fdb->timestamp)) {
if (aging_context_handle_timestamp(ctx, fdb->timestamp, NULL)) {
sbrec_fdb_delete(fdb);
if (aging_context_is_at_limit(ctx)) {
break;
Expand Down Expand Up @@ -293,11 +552,13 @@ en_fdb_aging_run(struct engine_node *node, void *data OVS_UNUSED)
continue;
}

uint64_t threshold =
smap_get_uint(&od->nbs->other_config, "fdb_age_threshold", 0);
aging_context_set_threshold(&ctx, threshold * 1000);

struct threshold_config threshold_config;
memset(&threshold_config, 0, sizeof threshold_config);
threshold_config.default_threshold =
smap_get_uint(&od->nbs->other_config, "fdb_age_threshold", 0);
aging_context_set_threshold(&ctx, &threshold_config);
fdb_run_for_datapath(od->sb, sbrec_fdb_by_dp_key, &ctx);

if (aging_context_is_at_limit(&ctx)) {
/* Schedule the next run after specified delay. */
ctx.next_wake_ms = AGING_BULK_REMOVAL_DELAY_MSEC;
Expand Down
3 changes: 3 additions & 0 deletions northd/aging.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ void *en_mac_binding_aging_waker_init(struct engine_node *node,
struct engine_arg *arg);
void en_mac_binding_aging_waker_cleanup(void *data);

/* The MAC binding aging helper function. */
unsigned int min_mac_binding_age_threshold(const char *opt);

/* The FDB aging node functions. */
void en_fdb_aging_run(struct engine_node *node, void *data);
void *en_fdb_aging_init(struct engine_node *node, struct engine_arg *arg);
Expand Down

0 comments on commit 78851b6

Please sign in to comment.