Skip to content

Commit

Permalink
Replace chassis mac with router port mac on destination chassis
Browse files Browse the repository at this point in the history
During E-W routing for vlan backed networks, we replace router port
mac with chassis mac, when packet leaves the source hypervisor.

As a result, the destination VM (on remote hypervisor) will see
chassis mac as source mac in received packet.

Although, functionality wise this does not cause any issue,
however chassis mac being see as source on VM, will
lead to following:
a. INCONSISTENT SOURCE MAC:
   If the destination VM moves to same hypervisor as sender,
   then it will see router port mac as source mac. Whereas, on
   a remote hypervisor, source mac will be the sender chassis mac.

   This will cause inconsistency in packet headers for the same
   flow and could be confusing for someone looking at packet
   captures inside the vm.

b. SYSTEM MAC BEING EXPOSED TO VM:
   Chassis mac is a CMS provided mac, i.e it is an infrastructure
   mac. It is not a good practice to expose such values to VM,
   which should not be seeing them in first place.

In order to replace chassis mac with router port mac, we will
do following.

a. Create conjunction for each chassis mac and router port vlan
   id combination. For example, for a 2 node chassis setup, where
   we have a logical router, connected to 4 logical switches with
   vlan ids: 2000, 1000, 0 and 24, the conjunction flow will look
   like following:

   cookie=0x0, duration=9094.608s, table=0, n_packets=0, n_bytes=0, idle_age=9094, priority=180,dl_src=aa:bb:cc:dd:ee:22 actions=conjunction(100,1/2)
   cookie=0x0, duration=9094.608s, table=0, n_packets=0, n_bytes=0, idle_age=9094, priority=180,dl_src=aa:bb:cc:dd:ff:ee actions=conjunction(100,1/2)

   cookie=0x0, duration=9094.552s, table=0, n_packets=0, n_bytes=0, idle_age=9094, priority=180,dl_vlan=2000 actions=conjunction(100,2/2)
   cookie=0x0, duration=9094.552s, table=0, n_packets=0, n_bytes=0, idle_age=9094, priority=180,dl_vlan=1000 actions=conjunction(100,2/2)
   cookie=0x0, duration=9094.552s, table=0, n_packets=0, n_bytes=0, idle_age=9094, priority=180,vlan_tci=0x0000/0x1fff actions=conjunction(100,2/2)
   cookie=0x0, duration=9094.552s, table=0, n_packets=0, n_bytes=0, idle_age=9094, priority=180,dl_vlan=24 actions=conjunction(100,2/2)

b. Using this conjunction as match, we can identify if packet entering destination
   hypervisor was routed at the source or not. This will be done in table=0 (Physical to logical)
   at priority=180.
   For example:
   cookie=0x0, duration=9795.957s, table=0, n_packets=1391, n_bytes=141882, idle_age=8396, priority=180,conj_id=100,in_port=146,dl_vlan=1000 actions=.........,mod_dl_src:00:00:01:01:02:03,...

c. We use conjunction, as it will ensure that we do not end up having lot of flows
   as we scale up. If we do not use conjunction, then we will have
   N (number of chassis macs) X M (number of router vlans) number of ovs flows.
   Conjunction converts it to N + M.
   Consider a setup, with 500 Chassis and 500 routed vlans.
   Without conjunction we will need 25000 (500 * 500) flows,
   whereas with conjunction that number comes down to 1000 (500 + 500).

Signed-off-by: Ankur Sharma <ankur.sharma@nutanix.com>
Signed-off-by: 0-day Robot <robot@bytheb.org>
  • Loading branch information
ankursharm authored and ovsrobot committed Sep 8, 2019
1 parent 0cb57d3 commit 35d93ef
Show file tree
Hide file tree
Showing 7 changed files with 243 additions and 12 deletions.
2 changes: 1 addition & 1 deletion controller/chassis.c
Expand Up @@ -144,7 +144,7 @@ get_bridge_mappings(const struct smap *ext_ids)
return smap_get_def(ext_ids, "ovn-bridge-mappings", "");
}

static const char *
const char *
get_chassis_mac_mappings(const struct smap *ext_ids)
{
return smap_get_def(ext_ids, "ovn-chassis-mac-mappings", "");
Expand Down
3 changes: 3 additions & 0 deletions controller/chassis.h
Expand Up @@ -27,6 +27,7 @@ struct sbrec_chassis;
struct sbrec_chassis_table;
struct sset;
struct eth_addr;
struct smap;

void chassis_register_ovs_idl(struct ovsdb_idl *);
const struct sbrec_chassis *chassis_run(
Expand All @@ -42,5 +43,7 @@ bool chassis_get_mac(const struct sbrec_chassis *chassis,
const char *bridge_mapping,
struct eth_addr *chassis_mac);
const char *chassis_get_id(void);
const char * get_chassis_mac_mappings(const struct smap *ext_ids);


#endif /* controller/chassis.h */
5 changes: 5 additions & 0 deletions controller/ovn-controller.c
Expand Up @@ -1268,9 +1268,14 @@ en_flow_output_run(struct engine_node *node)
(struct sbrec_port_binding_table *)EN_OVSDB_GET(
engine_get_input("SB_port_binding", node));

struct sbrec_chassis_table *chassis_table =
(struct sbrec_chassis_table *)EN_OVSDB_GET(
engine_get_input("SB_chassis", node));

physical_run(sbrec_port_binding_by_name,
multicast_group_table,
port_binding_table,
chassis_table,
mff_ovn_geneve,
br_int, chassis, ct_zones,
local_datapaths, local_lports,
Expand Down
222 changes: 216 additions & 6 deletions controller/physical.c
Expand Up @@ -47,9 +47,23 @@

VLOG_DEFINE_THIS_MODULE(physical);

/* Datapath zone IDs for connection tracking and NAT */
struct zone_ids {
int ct; /* MFF_LOG_CT_ZONE. */
int dnat; /* MFF_LOG_DNAT_ZONE. */
int snat; /* MFF_LOG_SNAT_ZONE. */
};

static void
load_logical_ingress_metadata(const struct sbrec_port_binding *binding,
const struct zone_ids *zone_ids,
struct ofpbuf *ofpacts_p);

/* UUID to identify OF flows not associated with ovsdb rows. */
static struct uuid *hc_uuid = NULL;

#define CHASSIS_MAC_TO_ROUTER_MAC_CONJID 100

void
physical_register_ovs_idl(struct ovsdb_idl *ovs_idl)
{
Expand Down Expand Up @@ -199,12 +213,6 @@ get_localnet_port(const struct hmap *local_datapaths, int64_t tunnel_key)
return ld ? ld->localnet_port : NULL;
}

/* Datapath zone IDs for connection tracking and NAT */
struct zone_ids {
int ct; /* MFF_LOG_CT_ZONE. */
int dnat; /* MFF_LOG_DNAT_ZONE. */
int snat; /* MFF_LOG_SNAT_ZONE. */
};

static struct zone_ids
get_zone_ids(const struct sbrec_port_binding *binding,
Expand Down Expand Up @@ -385,6 +393,200 @@ put_remote_port_redirect_overlay(const struct
match, ofpacts_p, &binding->header_.uuid);
}


static struct hmap remote_chassis_macs =
HMAP_INITIALIZER(&remote_chassis_macs);

/* Maps from a physical network name to the chassis macs of remote chassis. */
struct remote_chassis_mac {
struct hmap_node hmap_node;
char *chassis_mac;
char *chassis_id;
};

static void
populate_remote_chassis_macs(const struct sbrec_chassis *my_chassis,
const struct sbrec_chassis_table *chassis_table)
{
const struct sbrec_chassis *chassis;
SBREC_CHASSIS_TABLE_FOR_EACH (chassis, chassis_table) {

/* We want only remote chassis macs. */
if (!strcmp(my_chassis->name, chassis->name)) {
continue;
}

const char *tokens
= get_chassis_mac_mappings(&chassis->external_ids);

if (!strlen(tokens)) {
continue;
}

char *save_ptr = NULL;
char *token;
char *tokstr = xstrdup(tokens);

/* Format for a chassis mac configuration is:
* ovn-chassis-mac-mappings="bridge-name1:MAC1,bridge-name2:MAC2"
*/
for (token = strtok_r(tokstr, ",", &save_ptr);
token != NULL;
token = strtok_r(NULL, ",", &save_ptr)) {
char *save_ptr2 = NULL;
char *chassis_mac_bridge = strtok_r(token, ":", &save_ptr2);
char *chassis_mac_str = strtok_r(NULL, "", &save_ptr2);
struct remote_chassis_mac *remote_chassis_mac = NULL;
remote_chassis_mac = xmalloc(sizeof *remote_chassis_mac);
hmap_insert(&remote_chassis_macs, &remote_chassis_mac->hmap_node,
hash_string(chassis_mac_bridge, 0));
remote_chassis_mac->chassis_mac = xstrdup(chassis_mac_str);
remote_chassis_mac->chassis_id = xstrdup(chassis->name);
}
free(tokstr);
}
}

static void
free_remote_chassis_macs(void)
{
struct remote_chassis_mac *mac, *next_mac;

HMAP_FOR_EACH_SAFE (mac, next_mac, hmap_node, &remote_chassis_macs) {
hmap_remove(&remote_chassis_macs, &mac->hmap_node);
free(mac->chassis_mac);
free(mac->chassis_id);
free(mac);
}
}

static void
put_chassis_mac_conj_id_flow(const struct sbrec_chassis_table *chassis_table,
const struct sbrec_chassis *chassis,
struct ofpbuf *ofpacts_p,
struct ovn_desired_flow_table *flow_table)
{
struct match match;
struct remote_chassis_mac *mac;

populate_remote_chassis_macs(chassis, chassis_table);

HMAP_FOR_EACH (mac, hmap_node, &remote_chassis_macs) {
struct eth_addr chassis_mac;
char *err_str = NULL;
struct ofpact_conjunction *conj;

if ((err_str = str_to_mac(mac->chassis_mac, &chassis_mac))) {
free(err_str);
free_remote_chassis_macs();
return;
}

ofpbuf_clear(ofpacts_p);
match_init_catchall(&match);


match_set_dl_src(&match, chassis_mac);

conj = ofpact_put_CONJUNCTION(ofpacts_p);
conj->id = CHASSIS_MAC_TO_ROUTER_MAC_CONJID;
conj->n_clauses = 2;
conj->clause = 0;
ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 180, 0,
&match, ofpacts_p, hc_uuid);
}

free_remote_chassis_macs();
}

static void
put_replace_chassis_mac_flows(const struct simap *ct_zones,
const struct
sbrec_port_binding *localnet_port,
const struct hmap *local_datapaths,
struct ofpbuf *ofpacts_p,
ofp_port_t ofport,
struct ovn_desired_flow_table *flow_table)
{
/* Packets arriving on localnet port, could have been routed on
* source chassis and hence will have a chassis mac.
* conj_match will match source mac with chassis macs conjunction
* and replace it with corresponding router port mac.
*/
struct local_datapath *ld = get_local_datapath(local_datapaths,
localnet_port->datapath->
tunnel_key);
ovs_assert(ld);

int tag = localnet_port->tag ? *localnet_port->tag : 0;
struct zone_ids zone_ids = get_zone_ids(localnet_port, ct_zones);

for (int i = 0; i < ld->n_peer_ports; i++) {
const struct sbrec_port_binding *rport_binding = ld->peer_ports[i];
struct eth_addr router_port_mac;
char *err_str = NULL;
struct match match;
struct ofpact_mac *replace_mac;

ovs_assert(rport_binding->n_mac == 1);
if ((err_str = str_to_mac(rport_binding->mac[0], &router_port_mac))) {
/* Parsing of mac failed. */
VLOG_WARN("Parsing or router port mac failed for router port: %s, "
"with error: %s", rport_binding->logical_port, err_str);
free(err_str);
return;
}
ofpbuf_clear(ofpacts_p);
match_init_catchall(&match);

/* Add flow, which will match on conjunction id and will
* replace source with router port mac */

/* Match on ingress port, vlan_id and conjunction id */
match_set_in_port(&match, ofport);
match_set_conj_id(&match, CHASSIS_MAC_TO_ROUTER_MAC_CONJID);

if (tag) {
match_set_dl_vlan(&match, htons(tag), 0);
} else {
match_set_dl_tci_masked(&match, 0, htons(VLAN_CFI));
}

/* Actions */

if (tag) {
ofpact_put_STRIP_VLAN(ofpacts_p);
}
load_logical_ingress_metadata(localnet_port, &zone_ids, ofpacts_p);
replace_mac = ofpact_put_SET_ETH_SRC(ofpacts_p);
replace_mac->mac = router_port_mac;

/* Resubmit to first logical ingress pipeline table. */
put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG,
180, 0, &match, ofpacts_p, hc_uuid);

/* Provide second search criteria, i.e localnet port's
* vlan ID for conjunction flow */
struct ofpact_conjunction *conj;
ofpbuf_clear(ofpacts_p);
match_init_catchall(&match);

if (tag) {
match_set_dl_vlan(&match, htons(tag), 0);
} else {
match_set_dl_tci_masked(&match, 0, htons(VLAN_CFI));
}

conj = ofpact_put_CONJUNCTION(ofpacts_p);
conj->id = CHASSIS_MAC_TO_ROUTER_MAC_CONJID;
conj->n_clauses = 2;
conj->clause = 1;
ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 180, 0, &match,
ofpacts_p, hc_uuid);
}
}

static void
put_replace_router_port_mac_flows(struct ovsdb_idl_index
*sbrec_port_binding_by_name,
Expand Down Expand Up @@ -931,6 +1133,11 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
&binding->header_.uuid);
}

if (!strcmp(binding->type, "localnet")) {
put_replace_chassis_mac_flows(ct_zones, binding, local_datapaths,
ofpacts_p, ofport, flow_table);
}

/* Table 65, Priority 100.
* =======================
*
Expand Down Expand Up @@ -1224,6 +1431,7 @@ void
physical_run(struct ovsdb_idl_index *sbrec_port_binding_by_name,
const struct sbrec_multicast_group_table *multicast_group_table,
const struct sbrec_port_binding_table *port_binding_table,
const struct sbrec_chassis_table *chassis_table,
enum mf_field_id mff_ovn_geneve,
const struct ovsrec_bridge *br_int,
const struct sbrec_chassis *chassis,
Expand Down Expand Up @@ -1369,6 +1577,8 @@ physical_run(struct ovsdb_idl_index *sbrec_port_binding_by_name,
struct ofpbuf ofpacts;
ofpbuf_init(&ofpacts, 0);

put_chassis_mac_conj_id_flow(chassis_table, chassis, &ofpacts, flow_table);

/* Set up flows in table 0 for physical-to-logical translation and in table
* 64 for logical-to-physical translation. */
const struct sbrec_port_binding *binding;
Expand Down
1 change: 1 addition & 0 deletions controller/physical.h
Expand Up @@ -46,6 +46,7 @@ void physical_register_ovs_idl(struct ovsdb_idl *);
void physical_run(struct ovsdb_idl_index *sbrec_port_binding_by_name,
const struct sbrec_multicast_group_table *,
const struct sbrec_port_binding_table *,
const struct sbrec_chassis_table *chassis_table,
enum mf_field_id mff_ovn_geneve,
const struct ovsrec_bridge *br_int,
const struct sbrec_chassis *chassis,
Expand Down
10 changes: 6 additions & 4 deletions ovn-architecture.7.xml
Expand Up @@ -1426,7 +1426,7 @@
(MAC,VLAN) tuple will seen by physical network from other chassis as
well, which could cause these issues:
</p>

<ul>
<li>
Continuous MAC moves in top-of-rack switch (ToR).
Expand All @@ -1442,9 +1442,11 @@

<li>
The destination chassis receives the packet via the localnet port and
sends it to the integration bridge. The packet enters the
ingress pipeline and then egress pipeline of the destination localnet
logical switch and finally gets delivered to the destination VM port.
sends it to the integration bridge. Before entering the integration
bridge the source mac of the packet will be replaced with
router port mac again. The packet enters the ingress pipeline and
then egress pipeline of the destination localnet logical switch and
finally gets delivered to the destination VM port.
</li>
</ol>

Expand Down
12 changes: 11 additions & 1 deletion tests/ovn.at
Expand Up @@ -14278,6 +14278,14 @@ hv_to_chassis_mac () {
esac
}

lrp_to_lrp_mac () {
case $1 in dnl (
router-to-ls[[1]]) echo 000001010203 ;; dnl (
router-to-ls[[2]]) echo 000001010205 ;; dnl (
*) AT_FAIL_IF([:]) ;;
esac
}

ip_to_hex() {
printf "%02x%02x%02x%02x" "$@"
}
Expand Down Expand Up @@ -14345,7 +14353,9 @@ test_ip() {
# (and checksum).
outport_num=`vif_to_num $outport`
out_lrp=`vif_to_lrp $outport`
echo f000000000${outport_num}aabbccddee${hv_num}${hv_num}08004500001c00000000"3f1101"00${src_ip}${dst_ip}0035111100080000
lrp_mac=`lrp_to_lrp_mac $out_lrp`
echo f000000000${outport_num}${lrp_mac}08004500001c00000000"3f1101"00${src_ip}${dst_ip}0035111100080000

fi >> $outport.expected
done
}
Expand Down

0 comments on commit 35d93ef

Please sign in to comment.