Skip to content

Commit

Permalink
Allow explicit setting of the SNAT zone on a gateway router.
Browse files Browse the repository at this point in the history
In certain situations, OVN may coexist with other applications on a
host. Traffic from OVN and the other applications may then go out a
shared gateway. If OVN traffic and the other application traffic use
different conntrack zones for SNAT, then it is possible for the shared
gateway to assign conflicting source IP:port combinations. By sharing
the same conntrack zone, there will be no conflicting assignments.

In this commit, we introduce options:snat-ct-zone for northbound logical
routers. By setting this option, users can explicitly set the conntrack
zone for the logical router so that it will match the zone used by
non-OVN traffic on the host.

The biggest side effects of this patch are:
1) southbound datapath changes now result in recalculating CT zones in
   ovn-controller. This can result in recomputing physical flows in more
   situations than previously.
2) The table 65 flow to transition between datapaths is no longer
   associated with a port binding. This is because the flow refers to
   the peer datapath's CT zones, which can now be updated due to changes
   on that datapath. The flow therefore may need to be updated either
   due to the port binding being changed or the peer datapath being
   changed.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1892311
Signed-off-by: Mark Michelson <mmichels@redhat.com>
Acked-by: Numan Siddique <numans@ovn.org>
  • Loading branch information
putnopvut committed Nov 17, 2020
1 parent e14b52a commit f9cab11
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 14 deletions.
86 changes: 73 additions & 13 deletions controller/ovn-controller.c
Expand Up @@ -531,6 +531,21 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl,
}
}

static void
add_pending_ct_zone_entry(struct shash *pending_ct_zones,
enum ct_zone_pending_state state,
int zone, bool add, const char *name)
{
VLOG_DBG("%s ct zone %"PRId32" for '%s'",
add ? "assigning" : "removing", zone, name);

struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
pending->state = state; /* Skip flushing zone. */
pending->zone = zone;
pending->add = add;
shash_add(pending_ct_zones, name, pending);
}

static void
update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
struct simap *ct_zones, unsigned long *ct_zone_bitmap,
Expand All @@ -540,6 +555,8 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
int scan_start = 1;
const char *user;
struct sset all_users = SSET_INITIALIZER(&all_users);
struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];

SSET_FOR_EACH(user, lports) {
sset_add(&all_users, user);
Expand All @@ -554,6 +571,11 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
char *snat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "snat");
sset_add(&all_users, dnat);
sset_add(&all_users, snat);

int req_snat_zone = datapath_snat_ct_zone(ld->datapath);
if (req_snat_zone >= 0) {
simap_put(&req_snat_zones, snat, req_snat_zone);
}
free(dnat);
free(snat);
}
Expand All @@ -564,15 +586,56 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
VLOG_DBG("removing ct zone %"PRId32" for '%s'",
ct_zone->data, ct_zone->name);

struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
pending->state = CT_ZONE_DB_QUEUED; /* Skip flushing zone. */
pending->zone = ct_zone->data;
pending->add = false;
shash_add(pending_ct_zones, ct_zone->name, pending);
add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_DB_QUEUED,
ct_zone->data, false, ct_zone->name);

bitmap_set0(ct_zone_bitmap, ct_zone->data);
simap_delete(ct_zones, ct_zone);
} else if (!simap_find(&req_snat_zones, ct_zone->name)) {
bitmap_set1(unreq_snat_zones, ct_zone->data);
}
}

/* Prioritize requested CT zones */
struct simap_node *snat_req_node;
SIMAP_FOR_EACH (snat_req_node, &req_snat_zones) {
struct simap_node *node = simap_find(ct_zones, snat_req_node->name);
if (node) {
if (node->data == snat_req_node->data) {
/* No change to this request, so no action needed */
continue;
} else {
/* Zone request has changed for this node. delete old entry */
bitmap_set0(ct_zone_bitmap, node->data);
simap_delete(ct_zones, node);
}
}

/* Determine if someone already had this zone auto-assigned.
* If so, then they need to give up their assignment since
* that zone is being explicitly requested now.
*/
if (bitmap_is_set(unreq_snat_zones, snat_req_node->data)) {
struct simap_node *dup;
struct simap_node *next;
SIMAP_FOR_EACH_SAFE (dup, next, ct_zones) {
if (dup != snat_req_node && dup->data == snat_req_node->data) {
simap_delete(ct_zones, dup);
break;
}
}
/* Set this bit to 0 so that if multiple datapaths have requested
* this zone, we don't needlessly double-detect this condition.
*/
bitmap_set0(unreq_snat_zones, snat_req_node->data);
}

add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
snat_req_node->data, true,
snat_req_node->name);

bitmap_set1(ct_zone_bitmap, snat_req_node->data);
simap_put(ct_zones, snat_req_node->name, snat_req_node->data);
}

/* xxx This is wasteful to assign a zone to each port--even if no
Expand All @@ -592,22 +655,18 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
if (zone == MAX_CT_ZONES + 1) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
VLOG_WARN_RL(&rl, "exhausted all ct zones");
return;
break;
}
scan_start = zone + 1;

VLOG_DBG("assigning ct zone %"PRId32" to '%s'", zone, user);

struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
pending->state = CT_ZONE_OF_QUEUED;
pending->zone = zone;
pending->add = true;
shash_add(pending_ct_zones, user, pending);
add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
zone, true, user);

bitmap_set1(ct_zone_bitmap, zone);
simap_put(ct_zones, user, zone);
}

simap_destroy(&req_snat_zones);
sset_destroy(&all_users);
}

Expand Down Expand Up @@ -2330,6 +2389,7 @@ main(int argc, char *argv[])

engine_add_input(&en_ct_zones, &en_ovs_open_vswitch, NULL);
engine_add_input(&en_ct_zones, &en_ovs_bridge, NULL);
engine_add_input(&en_ct_zones, &en_sb_datapath_binding, NULL);
engine_add_input(&en_ct_zones, &en_runtime_data, NULL);

engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL);
Expand Down
2 changes: 1 addition & 1 deletion controller/physical.c
Expand Up @@ -926,7 +926,7 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,

ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100,
binding->header_.uuid.parts[0],
&match, ofpacts_p, &binding->header_.uuid);
&match, ofpacts_p, hc_uuid);
return;
}

Expand Down
7 changes: 7 additions & 0 deletions lib/ovn-util.c
Expand Up @@ -532,6 +532,13 @@ datapath_is_switch(const struct sbrec_datapath_binding *ldp)
{
return smap_get(&ldp->external_ids, "logical-switch") != NULL;
}

int
datapath_snat_ct_zone(const struct sbrec_datapath_binding *dp)
{
return smap_get_int(&dp->external_ids, "snat-ct-zone", -1);
}


struct tnlid_node {
struct hmap_node hmap_node;
Expand Down
1 change: 1 addition & 0 deletions lib/ovn-util.h
Expand Up @@ -107,6 +107,7 @@ uint32_t ovn_logical_flow_hash(const struct uuid *logical_datapath,
uint16_t priority,
const char *match, const char *actions);
bool datapath_is_switch(const struct sbrec_datapath_binding *);
int datapath_snat_ct_zone(const struct sbrec_datapath_binding *ldp);
void ovn_conn_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[] OVS_UNUSED, void *idl_);

Expand Down
10 changes: 10 additions & 0 deletions northd/ovn-northd.c
Expand Up @@ -1179,6 +1179,16 @@ ovn_datapath_update_external_ids(struct ovn_datapath *od)
smap_add(&ids, "interconn-ts", ts);
}
}

/* Set snat-ct-zone */
if (od->nbr) {
int nat_default_ct = smap_get_int(&od->nbr->options,
"snat-ct-zone", -1);
if (nat_default_ct >= 0) {
smap_add_format(&ids, "snat-ct-zone", "%d", nat_default_ct);
}
}

sbrec_datapath_binding_set_external_ids(od->sb, &ids);
smap_destroy(&ids);
}
Expand Down
7 changes: 7 additions & 0 deletions ovn-nb.xml
Expand Up @@ -1961,6 +1961,13 @@
unique key for each datapath by itself. However, if it is configured,
<code>ovn-northd</code> honors the configured value.
</column>
<column name="options" key="snat-ct-zone"
type='{"type": "integer", "minInteger": 0, "maxInteger": 65535}'>
Use the requested conntrack zone for SNAT with this router. This can be
useful if egress traffic from the host running OVN comes from both OVN
and other sources. This way, OVN and the other sources can make use of
the same conntrack zone.
</column>
</group>

<group title="Common Columns">
Expand Down
140 changes: 140 additions & 0 deletions tests/ovn.at
Expand Up @@ -22687,3 +22687,143 @@ AT_CHECK([test "$encap_rec_mvtep" = "$encap_rec_mvtep1"], [0], [])

OVN_CLEANUP([hv1])
AT_CLEANUP

AT_SETUP([ovn -- snat default ct zone])
ovn_start

net_add n1
sim_add hv1
check ovs-vsctl add-br br-phys
as hv1
ovn_attach n1 br-phys 192.168.0.10

check ovn-nbctl ls-add sw0
check ovn-nbctl lsp-add sw0 sw0-p1
check ovn-nbctl lsp-set-addresses sw0-p1 "00:00:00:00:00:02 10.0.0.2"

check ovn-nbctl lr-add gw_router
check ovn-nbctl set Logical_Router gw_router options:chassis="hv1"

check ovn-nbctl lrp-add gw_router gw_router-sw0 00:00:00:00:00:01 10.0.0.1/24
check ovn-nbctl lsp-add sw0 sw0-gw_router
check ovn-nbctl lsp-set-addresses sw0-gw_router router
check ovn-nbctl set Logical_Switch_Port sw0-gw_router type=router \
options:router-port=gw_router-sw0 \

check ovn-nbctl lr-nat-add gw_router snat 192.168.0.1 10.0.0.0/24

as hv1
check ovs-vsctl -- add-port br-int hv1-vif1 -- \
set interface hv1-vif1 external-ids:iface-id=sw0-p1

check ovn-nbctl --wait=hv sync

ro_nb_uuid=$(fetch_column nb:Logical_Router _uuid name=gw_router)
sw_nb_uuid=$(fetch_column nb:Logical_Switch _uuid name=sw0)
ro_sb_uuid=$(fetch_column Datapath_Binding _uuid external-ids:logical-router=${ro_nb_uuid})
sw_sb_uuid=$(fetch_column Datapath_Binding _uuid external-ids:logical-switch=${sw_nb_uuid})
ro_meta=$(fetch_column Datapath_Binding tunnel_key _uuid=${ro_sb_uuid})
ro_meta=$(printf %#x ${ro_meta})
sw_meta=$(fetch_column Datapath_Binding tunnel_key _uuid=${sw_sb_uuid})
sw_meta=$(printf %#x ${sw_meta})

echo "ro_nb_uuid: ${ro_nb_uuid}"
echo "sw_nb_uuid: ${sw_nb_uuid}"
echo "ro_sb_uuid: ${ro_sb_uuid}"
echo "sw_sb_uuid: ${sw_sb_uuid}"
echo "ro_meta: ${ro_meta}"
echo "sw_meta: ${sw_meta}"

echo "OVS bridge before requesting SNAT zone"
as hv1
ovs-vsctl list bridge br-int
ro_snat_zone=$(ovs-vsctl get bridge br-int external-ids:ct-zone-${ro_sb_uuid}_snat | tr -d \")
ro_snat_zone=$(printf %#x ${ro_snat_zone})
sw_snat_zone=$(ovs-vsctl get bridge br-int external-ids:ct-zone-${sw_sb_uuid}_snat | tr -d \")

echo "ro_snat_zone: ${ro_snat_zone}"
echo "sw_snat_zone: ${sw_snat_zone}"

as hv1 ovs-ofctl dump-flows br-int > offlows_pre
AT_CAPTURE_FILE([offlows_pre])
# We should have a flow in table 33 that transitions from the ingress pipeline
# to the egress pipeline of gw_router.
AT_CHECK_UNQUOTED([grep -c "table=33.*metadata=${ro_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" offlows_pre], [0], [dnl
1
])

# We should have a flow in table 65 that transitions from the egress pipeline
# of sw0 to the ingress pipeline of gw_router.
AT_CHECK_UNQUOTED([grep -c "table=65.*metadata=${sw_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" offlows_pre], [0], [dnl
1
])


# Request a specific zone for the gateway router. This should then get reflected
# both in the OVS database and in the flow table.
check ovn-nbctl --wait=hv set Logical_Router gw_router options:snat-ct-zone=666

echo "OVS bridge after requesting SNAT zone"
ovs-vsctl list bridge br-int

as hv1
ro_snat_zone=$(ovs-vsctl get bridge br-int external-ids:ct-zone-${ro_sb_uuid}_snat | tr -d \")

echo "ro_snat_zone: ${ro_snat_zone}"

AT_CHECK([test "${ro_snat_zone}" = "666"], [0], [])

ro_snat_zone=$(printf %#x "${ro_snat_zone}")

as hv1 ovs-ofctl dump-flows br-int > offlows_post
AT_CAPTURE_FILE([offlows_post])
# We should have a flow in table 33 that transitions from the ingress pipeline
# to the egress pipeline of gw_router.
AT_CHECK_UNQUOTED([grep -c "table=33.*metadata=${ro_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" offlows_post], [0], [dnl
1
])

# We should have a flow in table 65 that transitions from the egress pipeline
# of sw0 to the ingress pipeline of gw_router.
AT_CHECK_UNQUOTED([grep -c "table=65.*metadata=${sw_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" offlows_post], [0], [dnl
1
])

# Set router to use the zone that had been assigned to the switch. The router
# should claim the zone and the switch should change zones.
check ovn-nbctl --wait=hv set Logical_Router gw_router options:snat-ct-zone=${sw_snat_zone}

as hv1
ro_snat_zone=$(ovs-vsctl get bridge br-int external-ids:ct-zone-${ro_sb_uuid}_snat | tr -d \")

echo "ro_snat_zone: ${ro_snat_zone}"

AT_CHECK([test "${ro_snat_zone}" = "${sw_snat_zone}"], [0], [])

ro_snat_zone=$(printf %#x "${ro_snat_zone}")

echo "OVS bridge after stealing CT zone"
ovs-vsctl list bridge br-int

sw_new_snat_zone=$(ovs-vsctl get bridge br-int external-ids:ct-zone-${sw_sb_uuid}_snat | tr -d \")

echo "sw_new_snat_zone: ${sw_new_snat_zone}"

AT_CHECK([test "${sw_new_snat_zone}" != "${sw_snat_zone}"], [0], [])

as hv1 ovs-ofctl dump-flows br-int > offlows_stolen
AT_CAPTURE_FILE([offlows_stolen])
# We should have a flow in table 33 that transitions from the ingress pipeline
# to the egress pipeline of gw_router.
AT_CHECK_UNQUOTED([grep -c "table=33.*metadata=${ro_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" offlows_stolen], [0], [dnl
1
])

# We should have a flow in table 65 that transitions from the egress pipeline
# of sw0 to the ingress pipeline of gw_router.
AT_CHECK_UNQUOTED([grep -c "table=65.*metadata=${sw_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" offlows_stolen], [0], [dnl
1
])

OVN_CLEANUP([hv1])
AT_CLEANUP

0 comments on commit f9cab11

Please sign in to comment.