Skip to content

Commit

Permalink
chassis: Do not try to guess system-id changes.
Browse files Browse the repository at this point in the history
When the OVS system-id changes ovn-controller needs external (CMS) help
in order to update its own Chassis/Chassis_private records, i.e., the
CMS has to ensure that either ovn-controller is stopped (so that
ovn-controller cleans up its old Chassis/Chassis_private records) or
that after the system-id is changed, the stale Chassis/Chassis_private
records are destroyed externally.

This patch reverts the previous efforts to have ovn-controller reuse
stale Chassis records and documents how the system-id change operation
needs to be executed.  The main problem with reusing stale records is
that there's no safe way to make it work when RBAC is enabled.

Suggestedy-by: Han Zhou <hzhou@ovn.org>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374608.html
Fixes: f26c4a5 ("ovn-controller: Fix chassis ovn-sbdb record init")
Fixes: 4465f55 ("ovn-controller: Update stale chassis entry at init")
Fixes: 94a32fc ("chassis: Fix the way encaps are updated for a chassis record.")
Fixes: dce1af3 ("chassis: Fix chassis_private record updates when the system-id changes.")
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Mark Michelson <mmichels@redhat.com>
(cherry picked from commit fc359bf)
Signed-off-by: Numan Siddique <numans@ovn.org>
  • Loading branch information
dceara authored and numansiddique committed Dec 17, 2020
1 parent 6d81c67 commit 5230c9f
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 150 deletions.
132 changes: 10 additions & 122 deletions controller/chassis.c
Expand Up @@ -36,44 +36,6 @@ VLOG_DEFINE_THIS_MODULE(chassis);
#define HOST_NAME_MAX 255
#endif /* HOST_NAME_MAX */

/*
* Structure to hold chassis specific state (currently just chassis-id)
* to avoid database lookups when changes happen while the controller is
* running.
*/
struct chassis_info {
/* Last ID we initialized the Chassis SB record with. */
struct ds id;

/* True if Chassis SB record is initialized, false otherwise. */
uint32_t id_inited : 1;
};

static struct chassis_info chassis_state = {
.id = DS_EMPTY_INITIALIZER,
.id_inited = false,
};

static void
chassis_info_set_id(struct chassis_info *info, const char *id)
{
ds_clear(&info->id);
ds_put_cstr(&info->id, id);
info->id_inited = true;
}

static bool
chassis_info_id_inited(const struct chassis_info *info)
{
return info->id_inited;
}

static const char *
chassis_info_id(const struct chassis_info *info)
{
return ds_cstr_ro(&info->id);
}

/*
* Structure for storing the chassis config parsed from the ovs table.
*/
Expand Down Expand Up @@ -400,6 +362,9 @@ chassis_tunnels_changed(const struct sset *encap_type_set,
bool changed = false;

for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
return true;
}

if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
changed = true;
Expand Down Expand Up @@ -480,54 +445,11 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
return encaps;
}

/*
* Updates encaps for a given chassis. This can happen when the chassis
* name has changed. Also, the only thing we support updating is the
* chassis_name. For other changes the encaps will be recreated.
*/
static void
chassis_update_encaps(const struct sbrec_chassis *chassis)
{
for (size_t i = 0; i < chassis->n_encaps; i++) {
sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name);
}
}

/*
* Returns a pointer to a chassis record from 'chassis_table' that
* matches at least one tunnel config.
*/
static const struct sbrec_chassis *
chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
const struct ovs_chassis_cfg *ovs_cfg,
const char *chassis_id)
{
const struct sbrec_chassis *chassis_rec;

SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) {
for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
if (sset_contains(&ovs_cfg->encap_type_set,
chassis_rec->encaps[i]->type) &&
sset_contains(&ovs_cfg->encap_ip_set,
chassis_rec->encaps[i]->ip)) {
return chassis_rec;
}
if (strcmp(chassis_rec->name, chassis_id) == 0) {
return chassis_rec;
}
}
}

return NULL;
}

/* If this is a chassis config update after we initialized the record once
* then we should always be able to find it with the ID we saved in
* chassis_state.
* Otherwise (i.e., first time we create the record or if the system-id
* changed) then we check if there's a stale record from a previous
* controller run that didn't end gracefully and reuse it. If not then we
* create a new record.
* changed) we create a new record.
*
* Sets '*chassis_rec' to point to the local chassis record.
* Returns true if this record was already in the database, false if it was
Expand All @@ -536,33 +458,16 @@ chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
static bool
chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_chassis_by_name,
const struct sbrec_chassis_table *chassis_table,
const struct ovs_chassis_cfg *ovs_cfg,
const char *chassis_id,
const struct sbrec_chassis **chassis_rec)
{
const struct sbrec_chassis *chassis = NULL;

if (chassis_info_id_inited(&chassis_state)) {
chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
chassis_info_id(&chassis_state));
if (!chassis) {
VLOG_DBG("Could not find Chassis, will check if the id changed: "
"stored (%s) ovs (%s)",
chassis_info_id(&chassis_state), chassis_id);
}
}

if (!chassis) {
chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
}
const struct sbrec_chassis *chassis =
chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);

if (!chassis) {
/* Recreate the chassis record. */
if (!chassis && ovnsb_idl_txn) {
/* Create the chassis record. */
VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
if (ovnsb_idl_txn) {
*chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
}
*chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
return false;
}

Expand Down Expand Up @@ -628,7 +533,6 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
&ovs_cfg->encap_ip_set, ovs_cfg->encap_csum,
chassis_rec);
if (!tunnels_changed) {
chassis_update_encaps(chassis_rec);
return updated;
}

Expand All @@ -649,7 +553,6 @@ const struct sbrec_chassis *
chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_chassis_by_name,
const struct ovsrec_open_vswitch_table *ovs_table,
const struct sbrec_chassis_table *chassis_table,
const char *chassis_id,
const struct ovsrec_bridge *br_int,
const struct sset *transport_zones)
Expand All @@ -664,8 +567,7 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,

const struct sbrec_chassis *chassis_rec = NULL;
bool existed = chassis_get_record(ovnsb_idl_txn, sbrec_chassis_by_name,
chassis_table, &ovs_cfg, chassis_id,
&chassis_rec);
chassis_id, &chassis_rec);

/* If we found (or created) a record, update it with the correct config
* and store the current chassis_id for fast lookup in case it gets
Expand All @@ -675,7 +577,6 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
bool updated = chassis_update(chassis_rec, ovnsb_idl_txn, &ovs_cfg,
chassis_id, transport_zones);

chassis_info_set_id(&chassis_state, chassis_id);
if (!existed || updated) {
ovsdb_idl_txn_add_comment(ovnsb_idl_txn,
"ovn-controller: %s chassis '%s'",
Expand Down Expand Up @@ -750,16 +651,3 @@ chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
}
return false;
}

/*
* Returns the last initialized chassis-id.
*/
const char *
chassis_get_id(void)
{
if (chassis_info_id_inited(&chassis_state)) {
return chassis_info_id(&chassis_state);
}

return NULL;
}
2 changes: 0 additions & 2 deletions controller/chassis.h
Expand Up @@ -34,15 +34,13 @@ const struct sbrec_chassis *chassis_run(
struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_chassis_by_name,
const struct ovsrec_open_vswitch_table *,
const struct sbrec_chassis_table *,
const char *chassis_id, const struct ovsrec_bridge *br_int,
const struct sset *transport_zones);
bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
const struct sbrec_chassis *);
bool chassis_get_mac(const struct sbrec_chassis *chassis,
const char *bridge_mapping,
struct eth_addr *chassis_mac);
const char *chassis_get_id(void);
const char * get_chassis_mac_mappings(const struct smap *ext_ids);


Expand Down
7 changes: 6 additions & 1 deletion controller/ovn-controller.8.xml
Expand Up @@ -68,7 +68,12 @@
</p>
<dl>
<dt><code>external_ids:system-id</code></dt>
<dd>The chassis name to use in the Chassis table.</dd>
<dd>The chassis name to use in the Chassis table. Changing the
<code>system-id</code> while <code>ovn-controller</code> is running is
not directly supported. Users have two options: either first
gracefully stop <code>ovn-controller</code> or manually delete the
stale <code>Chassis</code> record after changing the
<code>system-id</code>.</dd>

<dt><code>external_ids:hostname</code></dt>
<dd>The hostname to use in the Chassis table.</dd>
Expand Down
20 changes: 11 additions & 9 deletions controller/ovn-controller.c
Expand Up @@ -1543,7 +1543,7 @@ static void init_physical_ctx(struct engine_node *node,
(struct ovsrec_bridge_table *)EN_OVSDB_GET(
engine_get_input("OVS_bridge", node));
const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
const char *chassis_id = chassis_get_id();
const char *chassis_id = get_ovs_chassis_id(ovs_table);
const struct sbrec_chassis *chassis = NULL;
struct ovsdb_idl_index *sbrec_chassis_by_name =
engine_ovsdb_node_get_index(
Expand Down Expand Up @@ -1619,7 +1619,11 @@ static void init_lflow_ctx(struct engine_node *node,
(struct sbrec_multicast_group_table *)EN_OVSDB_GET(
engine_get_input("SB_multicast_group", node));

const char *chassis_id = chassis_get_id();
struct ovsrec_open_vswitch_table *ovs_table =
(struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
engine_get_input("OVS_open_vswitch", node));

const char *chassis_id = get_ovs_chassis_id(ovs_table);
const struct sbrec_chassis *chassis = NULL;
struct ovsdb_idl_index *sbrec_chassis_by_name =
engine_ovsdb_node_get_index(
Expand Down Expand Up @@ -1700,7 +1704,7 @@ en_flow_output_run(struct engine_node *node, void *data)
(struct ovsrec_bridge_table *)EN_OVSDB_GET(
engine_get_input("OVS_bridge", node));
const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
const char *chassis_id = chassis_get_id();
const char *chassis_id = get_ovs_chassis_id(ovs_table);

struct ovsdb_idl_index *sbrec_chassis_by_name =
engine_ovsdb_node_get_index(
Expand Down Expand Up @@ -1858,7 +1862,7 @@ _flow_output_resource_ref_handler(struct engine_node *node, void *data,
(struct ovsrec_bridge_table *)EN_OVSDB_GET(
engine_get_input("OVS_bridge", node));
const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
const char *chassis_id = chassis_get_id();
const char *chassis_id = get_ovs_chassis_id(ovs_table);

struct ovsdb_idl_index *sbrec_chassis_by_name =
engine_ovsdb_node_get_index(
Expand Down Expand Up @@ -2367,16 +2371,14 @@ main(int argc, char *argv[])
ovsrec_bridge_table_get(ovs_idl_loop.idl);
const struct ovsrec_open_vswitch_table *ovs_table =
ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
const struct sbrec_chassis_table *chassis_table =
sbrec_chassis_table_get(ovnsb_idl_loop.idl);
const struct ovsrec_bridge *br_int =
process_br_int(ovs_idl_txn, bridge_table, ovs_table);
const char *chassis_id = get_ovs_chassis_id(ovs_table);
const struct sbrec_chassis *chassis = NULL;
if (chassis_id) {
chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name,
ovs_table, chassis_table, chassis_id,
br_int, &transport_zones);
ovs_table, chassis_id, br_int,
&transport_zones);
}

if (br_int) {
Expand Down Expand Up @@ -2603,7 +2605,7 @@ main(int argc, char *argv[])

const struct ovsrec_bridge *br_int = get_br_int(bridge_table,
ovs_table);
const char *chassis_id = chassis_get_id();
const char *chassis_id = get_ovs_chassis_id(ovs_table);
const struct sbrec_chassis *chassis
= (chassis_id
? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
Expand Down
29 changes: 13 additions & 16 deletions tests/ovn-controller.at
Expand Up @@ -187,30 +187,27 @@ OVS_WAIT_UNTIL([
test "${expected_iface_types}" = "${chassis_iface_types}"
])

# Change the value of external_ids:system-id and make sure it's mirrored
# in the Chassis record in the OVN_Southbound database.
# Change the value of external_ids:system-id.
# This requires operator intervention and removal of the stale chassis record.
# Until that happens ovn-controller fails to create the records due to
# constraint violation on the Encap table.
sysid=${sysid}-foo
ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"

OVS_WAIT_UNTIL([
chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
test "${sysid}" = "${chassis_id}"
grep -q 'Transaction causes multiple rows in \\"Encap\\" table to have identical values (geneve and \\"192.168.0.1\\") for index on columns \\"type\\" and \\"ip\\".' hv/ovn-controller.log
])

# Simulate system-id changing while ovn-controller is disconnected from the
# SB.
valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote)
invalid_remote=tcp:0.0.0.0:4242
ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote}
expected_state="not connected"
# Destroy the stale entry manually and ovn-controller should now be able
# to create new ones.
ovn-sbctl destroy chassis .
OVS_WAIT_UNTIL([
test "${expected_state}" = "$(ovn-appctl -t ovn-controller connection-status)"
test $(ovn-sbctl --columns _uuid find chassis name=${sysid} | wc -l) -eq 1
])
sysid=${sysid}-bar
ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote}

# Only one Chassis record should exist.
OVS_WAIT_UNTIL([
chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
test "${sysid}" = "${chassis_id}"
test $(ovn-sbctl --columns _uuid list chassis | wc -l) -eq 1
])

# Gracefully terminate daemons
Expand Down

0 comments on commit 5230c9f

Please sign in to comment.