Skip to content

Commit

Permalink
Improved Packet Drop Statistics in OVS
Browse files Browse the repository at this point in the history
Currently OVS maintains explicit packet drop/error counters only on port
level. Packets that are dropped as part of normal OpenFlow processing are
counted in flow stats of “drop” flows or as table misses in table stats.
These can only be interpreted by controllers that know the semantics of
the configured OpenFlow pipeline. Without that knowledge, it is impossible
for an OVS user to obtain e.g. the total number of packets dropped due to
OpenFlow rules.

Furthermore, there are numerous other reasons for which packets can be
dropped by OVS slow path that are not related to the OpenFlow pipeline.
The generated datapath flow entries include a drop action to avoid further
expensive upcalls to the slow path, but subsequent packets dropped by the
datapath are not accounted anywhere.

Finally, the datapath itself drops packets in certain error situations.
Also, these drops are today not accounted for.

This makes it difficult for OVS users to monitor packet drop in an OVS
instance and to alert a management system in case of a unexpected increase
of such drops. Also OVS trouble-shooters face difficulties in analysing
packet drops.

With this patch we implement following changes to address the issues
mentioned above.

1. Identify and account all the silent packet drop scenarios

2. Display these drops in ovs-appctl coverage/show

A detailed presentation on this was presented at OvS conference 2017 and
link for the corresponding presentation is available at:

https://www.slideshare.net/LF_OpenvSwitch/lfovs17troubleshooting-the-data-plane-in-ovs-82280329

Co-authored-by: Rohith Basavaraja <rohith.basavaraja@gmail.com>
Co-authored-by: Keshav Gupta <keshugupta1@gmail.com>
Signed-off-by: Anju Thomas <anju.thomas@ericsson.com>
Signed-off-by: Rohith Basavaraja <rohith.basavaraja@gmail.com>
Signed-off-by: Keshav Gupta <keshugupta1@gmail.com>
Signed-off-by: 0-day Robot <robot@bytheb.org>
  • Loading branch information
3 people authored and ovsrobot committed Feb 27, 2019
1 parent 11ff294 commit d3207ea
Show file tree
Hide file tree
Showing 19 changed files with 475 additions and 46 deletions.
1 change: 1 addition & 0 deletions datapath/linux/compat/include/linux/openvswitch.h
Expand Up @@ -946,6 +946,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_POP_NSH, /* No argument. */
OVS_ACTION_ATTR_METER, /* u32 meter number. */
OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
OVS_ACTION_ATTR_DROP, /* Drop action. */

#ifndef __KERNEL__
OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/
Expand Down
44 changes: 43 additions & 1 deletion lib/dpif-netdev.c
Expand Up @@ -101,6 +101,17 @@ enum { MAX_METERS = 65536 }; /* Maximum number of meters. */
enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */
enum { N_METER_LOCKS = 64 }; /* Maximum number of meters. */

COVERAGE_DEFINE(datapath_drop_meter);
COVERAGE_DEFINE(datapath_drop_upcall_error);
COVERAGE_DEFINE(datapath_drop_lock_error);
COVERAGE_DEFINE(datapath_drop_userspace_action_error);
COVERAGE_DEFINE(datapath_drop_tunnel_push_error);
COVERAGE_DEFINE(datapath_drop_tunnel_pop_error);
COVERAGE_DEFINE(datapath_drop_recirc_error);
COVERAGE_DEFINE(datapath_drop_invalid_port);
COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);

/* Protects against changes to 'dp_netdevs'. */
static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;

Expand Down Expand Up @@ -5647,6 +5658,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
band->packet_count += 1;
band->byte_count += dp_packet_size(packet);

COVERAGE_INC(datapath_drop_meter);
dp_packet_delete(packet);
} else {
/* Meter accepts packet. */
Expand Down Expand Up @@ -6402,6 +6414,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,

if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
dp_packet_delete(packet);
COVERAGE_INC(datapath_drop_rx_invalid_packet);
continue;
}

Expand Down Expand Up @@ -6528,6 +6541,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
put_actions);
if (OVS_UNLIKELY(error && error != ENOSPC)) {
dp_packet_delete(packet);
COVERAGE_INC(datapath_drop_upcall_error);
return error;
}

Expand Down Expand Up @@ -6659,6 +6673,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
if (OVS_UNLIKELY(!rules[i])) {
dp_packet_delete(packet);
COVERAGE_INC(datapath_drop_lock_error);
upcall_fail_cnt++;
}
}
Expand Down Expand Up @@ -6928,6 +6943,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
actions->data, actions->size);
} else if (should_steal) {
dp_packet_delete(packet);
COVERAGE_INC(datapath_drop_userspace_action_error);
}
}

Expand All @@ -6942,6 +6958,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
struct dp_netdev *dp = pmd->dp;
int type = nl_attr_type(a);
struct tx_port *p;
uint32_t packet_count, packet_dropped;

switch ((enum ovs_action_attr)type) {
case OVS_ACTION_ATTR_OUTPUT:
Expand Down Expand Up @@ -6983,6 +7000,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
dp_packet_batch_add(&p->output_pkts, packet);
}
return;
} else {
COVERAGE_ADD(datapath_drop_invalid_port,
dp_packet_batch_size(packets_));
}
break;

Expand All @@ -6992,10 +7012,16 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
* the ownership of these packets. Thus, we can avoid performing
* the action, because the caller will not use the result anyway.
* Just break to free the batch. */
COVERAGE_ADD(datapath_drop_tunnel_push_error,
dp_packet_batch_size(packets_));
break;
}
dp_packet_batch_apply_cutlen(packets_);
push_tnl_action(pmd, a, packets_);
packet_count = dp_packet_batch_size(packets_);
if (push_tnl_action(pmd, a, packets_)) {
COVERAGE_ADD(datapath_drop_tunnel_push_error,
packet_count);
}
return;

case OVS_ACTION_ATTR_TUNNEL_POP:
Expand All @@ -7015,7 +7041,13 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,

dp_packet_batch_apply_cutlen(packets_);

packet_count = packets_->count;
netdev_pop_header(p->port->netdev, packets_);
packet_dropped = packet_count - packets_->count;
if (packet_dropped) {
COVERAGE_ADD(datapath_drop_tunnel_pop_error,
packet_dropped);
}
if (dp_packet_batch_is_empty(packets_)) {
return;
}
Expand All @@ -7030,6 +7062,11 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
(*depth)--;
return;
}
COVERAGE_ADD(datapath_drop_invalid_tnl_port,
dp_packet_batch_size(packets_));
} else {
COVERAGE_ADD(datapath_drop_recirc_error,
dp_packet_batch_size(packets_));
}
break;

Expand Down Expand Up @@ -7074,6 +7111,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,

return;
}
COVERAGE_ADD(datapath_drop_lock_error,
dp_packet_batch_size(packets_));
break;

case OVS_ACTION_ATTR_RECIRC:
Expand All @@ -7097,6 +7136,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
return;
}

COVERAGE_ADD(datapath_drop_recirc_error,
dp_packet_batch_size(packets_));
VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
break;

Expand Down Expand Up @@ -7249,6 +7290,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
case OVS_ACTION_ATTR_PUSH_NSH:
case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_DROP:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
}
Expand Down
7 changes: 7 additions & 0 deletions lib/dpif.c
Expand Up @@ -1269,6 +1269,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_UNSPEC:
case OVS_ACTION_ATTR_DROP:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
}
Expand Down Expand Up @@ -1874,6 +1875,12 @@ dpif_supports_tnl_push_pop(const struct dpif *dpif)
return dpif_is_netdev(dpif);
}

bool
dpif_supports_explicit_drop_action(const struct dpif *dpif)
{
return dpif_is_netdev(dpif);
}

/* Meters */
void
dpif_meter_get_features(const struct dpif *dpif,
Expand Down
3 changes: 3 additions & 0 deletions lib/dpif.h
Expand Up @@ -888,6 +888,9 @@ int dpif_get_pmds_for_port(const struct dpif * dpif, odp_port_t port_no,

char *dpif_get_dp_version(const struct dpif *);
bool dpif_supports_tnl_push_pop(const struct dpif *);
bool dpif_supports_explicit_drop_action(const struct dpif *);
int dpif_show_drop_stats_support(struct dpif *dpif, bool detail,
struct ds *reply);

/* Log functions. */
struct vlog_module;
Expand Down
81 changes: 80 additions & 1 deletion lib/odp-execute.c
Expand Up @@ -25,6 +25,7 @@
#include <stdlib.h>
#include <string.h>

#include "coverage.h"
#include "dp-packet.h"
#include "dpif.h"
#include "netlink.h"
Expand All @@ -36,6 +37,74 @@
#include "util.h"
#include "csum.h"
#include "conntrack.h"
#include "ofproto/ofproto-dpif-xlate.h"
#include "openvswitch/vlog.h"

VLOG_DEFINE_THIS_MODULE(odp_execute)
COVERAGE_DEFINE(dp_sample_error_drop);
COVERAGE_DEFINE(dp_nsh_decap_error_drop);
COVERAGE_DEFINE(drop_action_of_pipeline);
COVERAGE_DEFINE(drop_action_bridge_not_found);
COVERAGE_DEFINE(drop_action_recursion_too_deep);
COVERAGE_DEFINE(drop_action_too_many_resubmit);
COVERAGE_DEFINE(drop_action_stack_too_deep);
COVERAGE_DEFINE(drop_action_no_recirculation_context);
COVERAGE_DEFINE(drop_action_recirculation_conflict);
COVERAGE_DEFINE(drop_action_too_many_mpls_labels);
COVERAGE_DEFINE(drop_action_invalid_tunnel_metadata);
COVERAGE_DEFINE(drop_action_unsupported_packet_type);
COVERAGE_DEFINE(drop_action_congestion);
COVERAGE_DEFINE(drop_action_forwarding_disabled);

static void
dp_update_drop_action_counter(enum xlate_error drop_reason,
int delta)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);

switch (drop_reason) {
case XLATE_OK:
COVERAGE_ADD(drop_action_of_pipeline, delta);
break;
case XLATE_BRIDGE_NOT_FOUND:
COVERAGE_ADD(drop_action_bridge_not_found, delta);
break;
case XLATE_RECURSION_TOO_DEEP:
COVERAGE_ADD(drop_action_recursion_too_deep, delta);
break;
case XLATE_TOO_MANY_RESUBMITS:
COVERAGE_ADD(drop_action_too_many_resubmit, delta);
break;
case XLATE_STACK_TOO_DEEP:
COVERAGE_ADD(drop_action_stack_too_deep, delta);
break;
case XLATE_NO_RECIRCULATION_CONTEXT:
COVERAGE_ADD(drop_action_no_recirculation_context, delta);
break;
case XLATE_RECIRCULATION_CONFLICT:
COVERAGE_ADD(drop_action_recirculation_conflict, delta);
break;
case XLATE_TOO_MANY_MPLS_LABELS:
COVERAGE_ADD(drop_action_too_many_mpls_labels, delta);
break;
case XLATE_INVALID_TUNNEL_METADATA:
COVERAGE_ADD(drop_action_invalid_tunnel_metadata, delta);
break;
case XLATE_UNSUPPORTED_PACKET_TYPE:
COVERAGE_ADD(drop_action_unsupported_packet_type, delta);
break;
case XLATE_CONGESTION_DROP:
COVERAGE_ADD(drop_action_congestion, delta);
break;
case XLATE_FORWARDING_DISABLED:
COVERAGE_ADD(drop_action_forwarding_disabled, delta);
break;
case XLATE_MAX:
default:
VLOG_ERR_RL(&rl, "Invalid Drop reason type:%d", drop_reason);
}
}


/* Masked copy of an ethernet address. 'src' is already properly masked. */
static void
Expand Down Expand Up @@ -621,6 +690,7 @@ odp_execute_sample(void *dp, struct dp_packet *packet, bool steal,
case OVS_SAMPLE_ATTR_PROBABILITY:
if (random_uint32() >= nl_attr_get_u32(a)) {
if (steal) {
COVERAGE_ADD(dp_sample_error_drop, 1);
dp_packet_delete(packet);
}
return;
Expand Down Expand Up @@ -705,6 +775,7 @@ requires_datapath_assistance(const struct nlattr *a)
case OVS_ACTION_ATTR_PUSH_NSH:
case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_DROP:
return false;

case OVS_ACTION_ATTR_UNSPEC:
Expand Down Expand Up @@ -921,6 +992,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
if (pop_nsh(packet)) {
dp_packet_batch_refill(batch, packet, i);
} else {
COVERAGE_INC(dp_nsh_decap_error_drop);
dp_packet_delete(packet);
}
}
Expand All @@ -931,7 +1003,14 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
conntrack_clear(packet);
}
break;

case OVS_ACTION_ATTR_DROP: {
const enum xlate_error *drop_reason = nl_attr_get(a);
if (*drop_reason < XLATE_MAX) {
dp_update_drop_action_counter(*drop_reason, batch->count);
}
dp_packet_delete_batch(batch, steal);
return;
}
case OVS_ACTION_ATTR_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
Expand Down
9 changes: 9 additions & 0 deletions lib/odp-util.c
Expand Up @@ -43,6 +43,7 @@
#include "uuid.h"
#include "openvswitch/vlog.h"
#include "openvswitch/match.h"
#include "ofproto/ofproto-dpif-xlate.h"

VLOG_DEFINE_THIS_MODULE(odp_util);

Expand Down Expand Up @@ -131,6 +132,7 @@ odp_action_len(uint16_t type)
case OVS_ACTION_ATTR_CLONE: return ATTR_LEN_VARIABLE;
case OVS_ACTION_ATTR_PUSH_NSH: return ATTR_LEN_VARIABLE;
case OVS_ACTION_ATTR_POP_NSH: return 0;
case OVS_ACTION_ATTR_DROP: return sizeof(enum xlate_error);

case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
Expand Down Expand Up @@ -1182,6 +1184,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a,
case OVS_ACTION_ATTR_POP_NSH:
ds_put_cstr(ds, "pop_nsh()");
break;
case OVS_ACTION_ATTR_DROP:
ds_put_cstr(ds, "drop");
break;
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
default:
Expand Down Expand Up @@ -2432,8 +2437,12 @@ odp_actions_from_string(const char *s, const struct simap *port_names,
struct ofpbuf *actions)
{
size_t old_size;
enum xlate_error drop_action;

if (!strcasecmp(s, "drop")) {
drop_action = XLATE_OK;
nl_msg_put_unspec(actions, OVS_ACTION_ATTR_DROP,
&drop_action, sizeof drop_action);
return 0;
}

Expand Down
1 change: 1 addition & 0 deletions ofproto/ofproto-dpif-ipfix.c
Expand Up @@ -3015,6 +3015,7 @@ dpif_ipfix_read_actions(const struct flow *flow,
case OVS_ACTION_ATTR_PUSH_NSH:
case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_UNSPEC:
case OVS_ACTION_ATTR_DROP:
case __OVS_ACTION_ATTR_MAX:
default:
break;
Expand Down
1 change: 1 addition & 0 deletions ofproto/ofproto-dpif-sflow.c
Expand Up @@ -1223,6 +1223,7 @@ dpif_sflow_read_actions(const struct flow *flow,
case OVS_ACTION_ATTR_PUSH_NSH:
case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_UNSPEC:
case OVS_ACTION_ATTR_DROP:
case __OVS_ACTION_ATTR_MAX:
default:
break;
Expand Down

0 comments on commit d3207ea

Please sign in to comment.