Skip to content

Commit

Permalink
qos: fix potential double deletion of ovs idl row
Browse files Browse the repository at this point in the history
If an interface with an qos option is deleted at the same
time as an ofport notification from ovs (causing runtime_data recompute)
is received, the binding module was trying to delete twice the same qos
queue, causing ovs to raise an exception.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2213219
Fixes: 7d1d111 ("controller: configure qos through ovs qos table and do not run tc directly")
Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
Acked-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
  • Loading branch information
simonartxavier authored and dceara committed Jul 17, 2023
1 parent ee6d1ac commit 0794a6e
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 18 deletions.
22 changes: 22 additions & 0 deletions controller/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -396,9 +396,23 @@ configure_qos(const struct sbrec_port_binding *pb,
q->burst = burst;
}

static const struct ovsrec_queue *
find_qos_queue_by_external_ids(const struct smap *external_ids,
struct ovsdb_idl_index *ovsrec_queue_by_external_ids)
{
const struct ovsrec_queue *queue =
ovsrec_queue_index_init_row(ovsrec_queue_by_external_ids);
ovsrec_queue_index_set_external_ids(queue, external_ids);
const struct ovsrec_queue *retval =
ovsrec_queue_index_find(ovsrec_queue_by_external_ids, queue);
ovsrec_queue_index_destroy_row(queue);
return retval;
}

static void
ovs_qos_entries_gc(struct ovsdb_idl_txn *ovs_idl_txn,
struct ovsdb_idl_index *ovsrec_port_by_qos,
struct ovsdb_idl_index *ovsrec_queue_by_external_ids,
const struct ovsrec_qos_table *qos_table,
struct hmap *queue_map)
{
Expand All @@ -414,6 +428,13 @@ ovs_qos_entries_gc(struct ovsdb_idl_txn *ovs_idl_txn,
if (!queue) {
continue;
}
const struct ovsrec_queue *ovsrec_queue =
find_qos_queue_by_external_ids(&queue->external_ids,
ovsrec_queue_by_external_ids);
if (!ovsrec_queue) {
VLOG_DBG("queue already deleted !");
continue;
}

const char *port = smap_get(&queue->external_ids, "ovn_port");
if (!port) {
Expand Down Expand Up @@ -2173,6 +2194,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
shash_destroy(&bridge_mappings);
/* Remove stale QoS entries. */
ovs_qos_entries_gc(b_ctx_in->ovs_idl_txn, b_ctx_in->ovsrec_port_by_qos,
b_ctx_in->ovsrec_queue_by_external_ids,
b_ctx_in->qos_table, b_ctx_out->qos_map);

cleanup_claimed_port_timestamps();
Expand Down
1 change: 1 addition & 0 deletions controller/binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ struct binding_ctx_in {
struct ovsdb_idl_index *sbrec_port_binding_by_datapath;
struct ovsdb_idl_index *sbrec_port_binding_by_name;
struct ovsdb_idl_index *ovsrec_port_by_qos;
struct ovsdb_idl_index *ovsrec_queue_by_external_ids;
const struct ovsrec_qos_table *qos_table;
const struct sbrec_port_binding_table *port_binding_table;
const struct ovsrec_bridge *br_int;
Expand Down
12 changes: 12 additions & 0 deletions controller/ovn-controller.c
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,7 @@ enum sb_engine_node {
OVS_NODE(port, "port") \
OVS_NODE(interface, "interface") \
OVS_NODE(qos, "qos") \
OVS_NODE(queue, "queue") \
OVS_NODE(flow_sample_collector_set, "flow_sample_collector_set")

enum ovs_engine_node {
Expand Down Expand Up @@ -1588,6 +1589,10 @@ init_binding_ctx(struct engine_node *node,
engine_ovsdb_node_get_index(
engine_get_input("OVS_port", node), "qos");

struct ovsdb_idl_index *ovsrec_queue_by_external_ids =
engine_ovsdb_node_get_index(
engine_get_input("OVS_queue", node), "external_ids");

struct controller_engine_ctx *ctrl_ctx = engine_get_context()->client_ctx;

b_ctx_in->ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn;
Expand All @@ -1596,6 +1601,7 @@ init_binding_ctx(struct engine_node *node,
b_ctx_in->sbrec_port_binding_by_datapath = sbrec_port_binding_by_datapath;
b_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
b_ctx_in->ovsrec_port_by_qos = ovsrec_port_by_qos;
b_ctx_in->ovsrec_queue_by_external_ids = ovsrec_queue_by_external_ids;
b_ctx_in->iface_table = iface_shadow->iface_table;
b_ctx_in->iface_table_external_ids_old =
&iface_shadow->iface_table_external_ids_old;
Expand Down Expand Up @@ -4611,6 +4617,9 @@ main(int argc, char *argv[])
struct ovsdb_idl_index *ovsrec_port_by_qos
= ovsdb_idl_index_create1(ovs_idl_loop.idl,
&ovsrec_port_col_qos);
struct ovsdb_idl_index *ovsrec_queue_by_external_ids
= ovsdb_idl_index_create1(ovs_idl_loop.idl,
&ovsrec_queue_col_external_ids);
struct ovsdb_idl_index *ovsrec_flow_sample_collector_set_by_id
= ovsdb_idl_index_create2(ovs_idl_loop.idl,
&ovsrec_flow_sample_collector_set_col_bridge,
Expand Down Expand Up @@ -4911,6 +4920,7 @@ main(int argc, char *argv[])
engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL);
engine_add_input(&en_runtime_data, &en_ovs_bridge, NULL);
engine_add_input(&en_runtime_data, &en_ovs_qos, NULL);
engine_add_input(&en_runtime_data, &en_ovs_queue, NULL);

engine_add_input(&en_runtime_data, &en_sb_chassis, NULL);
engine_add_input(&en_runtime_data, &en_sb_datapath_binding,
Expand Down Expand Up @@ -4972,6 +4982,8 @@ main(int argc, char *argv[])
engine_ovsdb_node_add_index(&en_ovs_flow_sample_collector_set, "id",
ovsrec_flow_sample_collector_set_by_id);
engine_ovsdb_node_add_index(&en_ovs_port, "qos", ovsrec_port_by_qos);
engine_ovsdb_node_add_index(&en_ovs_queue, "external_ids",
ovsrec_queue_by_external_ids);

struct ed_type_lflow_output *lflow_output_data =
engine_get_internal_data(&en_lflow_output);
Expand Down
34 changes: 34 additions & 0 deletions tests/ovn-macros.at
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,40 @@ fmt_pkt() {
print(out.decode())" | $PYTHON3
}

sleep_sb() {
echo SB going to sleep
AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)])
}
wake_up_sb() {
echo SB waking up
AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)])
}
sleep_controller() {
echo Controller $hv going to sleep
hv=$1
as $hv
check ovn-appctl debug/pause
OVS_WAIT_UNTIL([test x$(ovn-appctl -t ovn-controller debug/status) = "xpaused"])
}
wake_up_controller() {
hv=$1
as $hv
echo Controller $hv waking up
ovn-appctl debug/resume
OVS_WAIT_UNTIL([test x$(ovn-appctl -t ovn-controller debug/status) = "xrunning"])
}
sleep_ovs() {
hv=$1
echo ovs $hv going to sleep
AT_CHECK([kill -STOP $(cat $hv/ovs-vswitchd.pid)])
}

wake_up_ovs() {
hv=$1
echo ovs $hv going to sleep
AT_CHECK([kill -CONT $(cat $hv/ovs-vswitchd.pid)])
}

OVS_END_SHELL_HELPERS

m4_define([OVN_POPULATE_ARP], [AT_CHECK(ovn_populate_arp__, [0], [ignore])])
Expand Down
70 changes: 70 additions & 0 deletions tests/ovn.at
Original file line number Diff line number Diff line change
Expand Up @@ -36490,3 +36490,73 @@ OVN_CHECK_PACKETS_REMOVE_BROADCAST([hv2/vif1-tx.pcap], [expected])

AT_CLEANUP
])

OVN_FOR_EACH_NORTHD([
AT_SETUP([OVN QoS port deletion])
ovn_start

check ovn-nbctl ls-add ls1
check ovn-nbctl lsp-add ls1 public1
check ovn-nbctl lsp-set-addresses public1 unknown
check ovn-nbctl lsp-set-type public1 localnet
check ovn-nbctl lsp-set-options public1 network_name=phys
net_add n

# two hypervisors, each connected to the same network
for i in 1 2; do
sim_add hv-$i
as hv-$i
ovs-vsctl add-br br-phys
ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
ovn_attach n br-phys 192.168.0.$i
done

check ovn-nbctl lsp-add ls1 lsp1
check ovn-nbctl lsp-set-addresses lsp1 f0:00:00:00:00:03
as hv-1
ovs-vsctl add-port br-int vif1 -- \
set Interface vif1 external-ids:iface-id=lsp1 \
ofport-request=3

OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up lsp1` = xup])

check ovn-nbctl set Logical_Switch_Port lsp1 options:qos_max_rate=800000
check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 options:qos_burst=9000000

AS_BOX([$(date +%H:%M:%S.%03N) checking deletion of port with qos options])
check ovn-nbctl ls-add ls2
check ovn-nbctl lsp-add ls2 lsp2
check ovn-nbctl lsp-set-addresses lsp2 f0:00:00:00:00:05
as hv-1
ovs-vsctl add-port br-int vif2 -- \
set Interface vif2 external-ids:iface-id=lsp2 \
ofport-request=5
OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up lsp2` = xup])

# Sleep ovs to postpone ofport notification to ovn
sleep_ovs hv-1

# Create localnet; this will cause patch-port creation
check ovn-nbctl lsp-add ls2 public2
check ovn-nbctl lsp-set-addresses public2 unknown
check ovn-nbctl lsp-set-type public2 localnet
check ovn-nbctl --wait=sb set Logical_Switch_Port public2 options:qos_min_rate=6000000000 options:qos_max_rate=7000000000 options:qos_burst=8000000000 options:network_name=phys

# Let's now send ovn controller to sleep, so it will receive both ofport notification and ls deletion simultaneously
sleep_controller hv-1

# Tme to wake up ovs
wake_up_ovs hv-1

# Delete lsp1
check ovn-nbctl --wait=sb lsp-del lsp1

# And finally wake up controller
wake_up_controller hv-1

# Make sure ovn-controller is still OK
ovn-nbctl --wait=hv sync
OVS_WAIT_UNTIL([test $(as hv-1 ovs-vsctl list qos | grep -c linux-htb) -eq 1])

AT_CLEANUP
])
18 changes: 0 additions & 18 deletions tests/system-ovn.at
Original file line number Diff line number Diff line change
Expand Up @@ -10995,20 +10995,6 @@ wait_for_local_bindings() {
[kill -CONT $(cat ovn-sb/ovsdb-server.pid)]
)
}
sleep_sb() {
echo SB going to sleep
AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)])
}
wake_up_sb() {
echo SB waking up
AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)])
}
sleep_controller() {
echo Controller going to sleep
ovn-appctl debug/pause
OVS_WAIT_UNTIL([test x$(ovn-appctl -t ovn-controller debug/status) = "xpaused"])
}

stop_ovsdb_controller_updates() {
TCP_PORT=$1
echo Stopping updates from ovn-controller to ovsdb using port $TCP_PORT
Expand All @@ -11020,10 +11006,6 @@ restart_ovsdb_controller_updates() {
echo Restarting updates from ovn-controller to ovsdb
iptables -D INPUT -p tcp --destination-port $TCP_PORT -j DROP
}
wake_up_controller() {
echo Controller waking up
ovn-appctl debug/resume
}
ensure_controller_run() {
# We want to make sure controller could run at least one full loop.
# We can't use wait=hv as sb might be sleeping.
Expand Down

0 comments on commit 0794a6e

Please sign in to comment.