From 27501802d09f782b8133031c1eae3394ae5ce147 Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 4 Oct 2019 13:48:58 -0700 Subject: [PATCH] ofproto-dpif: Expose datapath capability to ovsdb. The patch adds support for fetching the datapath's capabilities from the result of 'check_support()', and write the supported capability to a new database column, called 'capabilities' under Datapath table. To see how it works, run: # ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev # ovs-vsctl -- --id=@m create Datapath datapath_version=0 \ 'ct_zones={}' 'capabilities={}' \ -- set Open_vSwitch . datapaths:"netdev"=@m # ovs-vsctl list-dp-cap netdev ufid=true sample_nesting=true clone=true tnl_push_pop=true \ ct_orig_tuple=true ct_eventmask=true ct_state=true \ ct_clear=true max_vlan_headers=1 recirc=true ct_label=true \ max_hash_alg=1 ct_state_nat=true ct_timeout=true \ ct_mark=true ct_orig_tuple6=true check_pkt_len=true \ masked_set_action=true max_mpls_depth=3 trunc=true ct_zone=true Signed-off-by: William Tu Tested-by: Greg Rose Reviewed-by: Greg Rose --- v5: Add improved documentation from Ben and fix checkpatch error (tab and line 79 char) v4: rebase to master v3: fix 32-bit build, reported by Greg travis: https://travis-ci.org/williamtu/ovs-travis/builds/599276267 v2: rebase to master --- lib/meta-flow.xml | 14 ++- ofproto/ofproto-dpif.c | 51 ++++++++ ofproto/ofproto-provider.h | 2 + ofproto/ofproto.c | 12 ++ ofproto/ofproto.h | 2 + tests/ovs-vsctl.at | 10 ++ utilities/ovs-vsctl.8.in | 6 + utilities/ovs-vsctl.c | 28 +++++ vswitchd/bridge.c | 21 ++++ vswitchd/vswitch.ovsschema | 5 +- vswitchd/vswitch.xml | 232 +++++++++++++++++++++++++++++++++++++ 11 files changed, 379 insertions(+), 4 deletions(-) diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml index d8763eb0b71..90b405c7375 100644 --- a/lib/meta-flow.xml +++ b/lib/meta-flow.xml @@ -2488,7 +2488,7 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123)

- Open vSwitch 2.5 and later support ``connection tracking,'' which allows + Open vSwitch supports ``connection tracking,'' which allows bidirectional streams of packets to be statefully grouped into connections. Open vSwitch connection tracking, for example, identifies the patterns of TCP packets that indicates a successfully initiated @@ -2524,7 +2524,14 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123)

- Connection tracking is an Open vSwitch extension to OpenFlow. + Connection tracking is an Open vSwitch extension to OpenFlow. Open + vSwitch 2.5 added the initial support for connection tracking. + Subsequent versions of Open vSwitch added many refinements and extensions + to the initial support. Many of these capabilities depend on the Open + vSwitch datapath rather than simply the userspace version. The + capabilities column in the Datapath table (see + ovs-vswitchd.conf.db(5)) reports the detailed capabilities + of a particular Open vSwitch datapath.

@@ -2713,7 +2720,8 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123)

- The following fields are populated by the ct action, and require a + The following fields are populated by the ct + action, and require a match to a valid connection tracking state as a prerequisite, in addition to the IP or IPv6 ethertype match. Examples of valid connection tracking state matches include ct_state=+new, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index c35ec3e6159..fa73d06a82f 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -5444,6 +5444,56 @@ ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id) } } +static void +get_datapath_cap(const char *datapath_type, struct smap *cap) +{ + char *str_value; + struct odp_support odp; + struct dpif_backer_support s; + struct dpif_backer *backer = shash_find_data(&all_dpif_backers, + datapath_type); + if (!backer) { + return; + } + s = backer->rt_support; + odp = s.odp; + + /* ODP_SUPPORT_FIELDS */ + str_value = xasprintf("%"PRIuSIZE, odp.max_vlan_headers); + smap_add(cap, "max_vlan_headers", str_value); + free(str_value); + + str_value = xasprintf("%"PRIuSIZE, odp.max_mpls_depth); + smap_add(cap, "max_mpls_depth", str_value); + free(str_value); + + smap_add(cap, "recirc", odp.recirc ? "true" : "false"); + smap_add(cap, "ct_state", odp.ct_state ? "true" : "false"); + smap_add(cap, "ct_zone", odp.ct_zone ? "true" : "false"); + smap_add(cap, "ct_mark", odp.ct_mark ? "true" : "false"); + smap_add(cap, "ct_label", odp.ct_label ? "true" : "false"); + smap_add(cap, "ct_state_nat", odp.ct_state_nat ? "true" : "false"); + smap_add(cap, "ct_orig_tuple", odp.ct_orig_tuple ? "true" : "false"); + smap_add(cap, "ct_orig_tuple6", odp.ct_orig_tuple6 ? "true" : "false"); + + /* DPIF_SUPPORT_FIELDS */ + smap_add(cap, "masked_set_action", s.masked_set_action ? "true" : "false"); + smap_add(cap, "tnl_push_pop", s.tnl_push_pop ? "true" : "false"); + smap_add(cap, "ufid", s.ufid ? "true" : "false"); + smap_add(cap, "trunc", s.trunc ? "true" : "false"); + smap_add(cap, "clone", s.clone ? "true" : "false"); + smap_add(cap, "sample_nesting", s.sample_nesting ? "true" : "false"); + smap_add(cap, "ct_eventmask", s.ct_eventmask ? "true" : "false"); + smap_add(cap, "ct_clear", s.ct_clear ? "true" : "false"); + + str_value = xasprintf("%"PRIuSIZE, s.max_hash_alg); + smap_add(cap, "max_hash_alg", str_value); + free(str_value); + + smap_add(cap, "check_pkt_len", s.check_pkt_len ? "true" : "false"); + smap_add(cap, "ct_timeout", s.ct_timeout ? "true" : "false"); +} + /* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and * 'nw_proto'. Returns true if the zone-based timeout policy is configured. * On success, stores the timeout policy name in 'tp_name', and sets @@ -6585,4 +6635,5 @@ const struct ofproto_class ofproto_dpif_class = { ct_flush, /* ct_flush */ ct_set_zone_timeout_policy, ct_del_zone_timeout_policy, + get_datapath_cap, }; diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index c980e6bffff..80c4fee06d0 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -1889,6 +1889,8 @@ struct ofproto_class { /* Deletes the timeout policy associated with 'zone' in datapath type * 'dp_type'. */ void (*ct_del_zone_timeout_policy)(const char *dp_type, uint16_t zone); + /* Get the datapath's capabilities. */ + void (*get_datapath_cap)(const char *dp_type, struct smap *caps); }; extern const struct ofproto_class ofproto_dpif_class; diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 3aaa45a9b3a..7535ba176b7 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -954,6 +954,18 @@ ofproto_get_flow_restore_wait(void) return flow_restore_wait; } +/* Retrieve datapath capabilities. */ +void +ofproto_get_datapath_cap(const char *datapath_type, struct smap *dp_cap) +{ + datapath_type = ofproto_normalize_type(datapath_type); + const struct ofproto_class *class = ofproto_class_find__(datapath_type); + + if (class->get_datapath_cap) { + class->get_datapath_cap(datapath_type, dp_cap); + } +} + /* Connection tracking configuration. */ void ofproto_ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id, diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 033c4cf93e9..48a9d602c21 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -372,6 +372,8 @@ void ofproto_ct_set_zone_timeout_policy(const char *datapath_type, struct simap *timeout_policy); void ofproto_ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone); +void ofproto_get_datapath_cap(const char *datapath_type, + struct smap *dp_cap); /* Configuration of ports. */ void ofproto_port_unregister(struct ofproto *, ofp_port_t ofp_port); diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at index 4d55f89e4ae..18b9cd7c89a 100644 --- a/tests/ovs-vsctl.at +++ b/tests/ovs-vsctl.at @@ -819,6 +819,10 @@ AT_CHECK([RUN_OVS_VSCTL([del-zone-tp netdev zone=1])]) AT_CHECK([RUN_OVS_VSCTL([--if-exists del-zone-tp netdev zone=1])]) AT_CHECK([RUN_OVS_VSCTL([list-zone-tp netdev])], [0], [Zone:2, Timeout Policies: icmp_first=2 icmp_reply=3 ]) + +AT_CHECK([RUN_OVS_VSCTL([-- --id=@m create Datapath datapath_version=0 'capabilities={recirc=true}' -- set Open_vSwitch . datapaths:"system"=@m])], [0], [stdout]) +AT_CHECK([RUN_OVS_VSCTL([list-dp-cap system])], [0], [recirc=true +]) OVS_VSCTL_CLEANUP AT_CLEANUP @@ -962,6 +966,12 @@ AT_CHECK([RUN_OVS_VSCTL([del-zone-tp netdev zone=11])], ]) AT_CHECK([RUN_OVS_VSCTL([list-zone-tp netdev])], [0], [Zone:2, Timeout Policies: icmp_first=2 icmp_reply=3 ]) + +AT_CHECK([RUN_OVS_VSCTL([-- --id=@m create Datapath datapath_version=0 'capabilities={recirc=true}' -- set Open_vSwitch . datapaths:"system"=@m])], [0], [stdout]) +AT_CHECK([RUN_OVS_VSCTL([list-dp-cap nosystem])], + [1], [], [ovs-vsctl: datapath "nosystem" record not found +]) + OVS_VSCTL_CLEANUP AT_CLEANUP diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in index 14a8aa4a48a..ff97922dd0c 100644 --- a/utilities/ovs-vsctl.8.in +++ b/utilities/ovs-vsctl.8.in @@ -379,6 +379,12 @@ delete a zone that does not exist has no effect. .IP "\fBlist\-zone\-tp \fIdatapath\fR" Prints the timeout policies of all zones in \fIdatapath\fR. . +.SS "Datapath Capabilities Command" +The command query datapath capabilities. +. +.IP "\fBlist\-dp\-cap \fIdatapath\fR" +Prints the datapath's capabilities. +. .SS "OpenFlow Controller Connectivity" . \fBovs\-vswitchd\fR can perform all configured bridging and switching diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c index 7232471e68b..bd3972636e6 100644 --- a/utilities/ovs-vsctl.c +++ b/utilities/ovs-vsctl.c @@ -1362,6 +1362,31 @@ pre_get_zone(struct ctl_context *ctx) ovsdb_idl_add_column(ctx->idl, &ovsrec_ct_timeout_policy_col_timeouts); } +static void +pre_get_dp_cap(struct ctl_context *ctx) +{ + ovsdb_idl_add_column(ctx->idl, &ovsrec_open_vswitch_col_datapaths); + ovsdb_idl_add_column(ctx->idl, &ovsrec_datapath_col_capabilities); +} + +static void +cmd_list_dp_cap(struct ctl_context *ctx) +{ + struct vsctl_context *vsctl_ctx = vsctl_context_cast(ctx); + struct smap_node *node; + + struct ovsrec_datapath *dp = find_datapath(vsctl_ctx, ctx->argv[1]); + if (!dp) { + ctl_fatal("datapath \"%s\" record not found", ctx->argv[1]); + } + + SMAP_FOR_EACH (node, &dp->capabilities) { + ds_put_format(&ctx->output, "%s=%s ",node->key, node->value); + } + ds_chomp(&ctx->output, ' '); + ds_put_char(&ctx->output, '\n'); +} + static void cmd_add_br(struct ctl_context *ctx) { @@ -3112,6 +3137,9 @@ static const struct ctl_command_syntax vsctl_commands[] = { "--if-exists", RW}, {"list-zone-tp", 1, 1, "", pre_get_zone, cmd_list_zone_tp, NULL, "", RO}, + /* Datapath capabilities. */ + {"list-dp-cap", 1, 1, "", pre_get_dp_cap, cmd_list_dp_cap, NULL, "", RO}, + {NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, RO}, }; diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 9095ebf5d5e..d402e39bdc6 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -171,6 +171,7 @@ struct datapath { struct hmap ct_zones; /* Map of 'struct ct_zone' elements, indexed * by 'zone'. */ struct hmap_node node; /* Node in 'all_datapaths' hmap. */ + struct smap caps; /* Capabilities. */ unsigned int last_used; /* The last idl_seqno that this 'datapath' * used in OVSDB. This number is used for * garbage collection. */ @@ -700,6 +701,7 @@ datapath_create(const char *type) dp->type = xstrdup(type); hmap_init(&dp->ct_zones); hmap_insert(&all_datapaths, &dp->node, hash_string(type, 0)); + smap_init(&dp->caps); return dp; } @@ -716,6 +718,7 @@ datapath_destroy(struct datapath *dp) hmap_remove(&all_datapaths, &dp->node); hmap_destroy(&dp->ct_zones); free(dp->type); + smap_destroy(&dp->caps); free(dp); } } @@ -758,6 +761,23 @@ ct_zones_reconfigure(struct datapath *dp, struct ovsrec_datapath *dp_cfg) } } +static void +dp_capability_reconfigure(struct datapath *dp, + struct ovsrec_datapath *dp_cfg) +{ + struct smap_node *node; + struct smap cap; + + smap_init(&cap); + ofproto_get_datapath_cap(dp->type, &cap); + + SMAP_FOR_EACH (node, &cap) { + ovsrec_datapath_update_capabilities_setkey(dp_cfg, node->key, + node->value); + } + smap_destroy(&cap); +} + static void datapath_reconfigure(const struct ovsrec_open_vswitch *cfg) { @@ -771,6 +791,7 @@ datapath_reconfigure(const struct ovsrec_open_vswitch *cfg) dp = datapath_lookup(dp_name); if (!dp) { dp = datapath_create(dp_name); + dp_capability_reconfigure(dp, dp_cfg); } dp->last_used = idl_seqno; ct_zones_reconfigure(dp, dp_cfg); diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema index 02be5ddeec9..0666c8c7644 100644 --- a/vswitchd/vswitch.ovsschema +++ b/vswitchd/vswitch.ovsschema @@ -1,6 +1,6 @@ {"name": "Open_vSwitch", "version": "8.2.0", - "cksum": "4076590391 26298", + "cksum": "1076640191 26427", "tables": { "Open_vSwitch": { "columns": { @@ -650,6 +650,9 @@ "value": {"type": "uuid", "refTable": "CT_Zone"}, "min": 0, "max": "unlimited"}}, + "capabilities": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}}, diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 02a68deb146..d4cc3b9ed5c 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -5682,6 +5682,238 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ connection tracking-related OpenFlow matches and actions). + +

+ The column reports a datapath's + features. For the netdev datapath, the + capabilities are fixed for a given version of Open vSwitch + because this datapath is built into the + ovs-vswitchd binary. The Linux kernel and + Windows and other datapaths, which are external to OVS + userspace, can vary in version and capabilities independently + from ovs-vswitchd. +

+ +

+ Some of these features indicate whether higher-level Open vSwitch + features are available. For example, OpenFlow features for + connection-tracking are available only when is true. A controller that wishes to + determine whether a feature is supported could, therefore, consult the + relevant capabilities in this table. However, as a general rule, it is + better for a controller to try to use the higher-level feature and use + the result as an indication of support, since the low-level + capabilities are more likely to shift over time than the high-level + features that rely on them. +

+ + + Number of 802.1q VLAN headers supported by the datapath, as probed by + the ovs-vswitchd slow path. If the datapath supports more + VLAN headers than the slow path, this reports the slow path's limit. + The value of in the table does not influence the number reported + here. + + + If this is true, then the datapath supports recirculation, + specifically OVS_KEY_ATTR_RECIRC_ID. Recirculation enables + higher performance for MPLS and active-active load balancing + bonding modes. + + +

+ These capabilities are granular because Open vSwitch and its + datapaths added support for connection tracking over several + releases, with features added individually over that time. +

+ + +

+ If true, datapath supports OVS_KEY_ATTR_CT_STATE, which indicates + support for the bits in the OpenFlow ct_state field + (see ovs-fields(7)) other than snat and + dnat, which have a separate capability. +

+ +

+ If this is false, the datapath does not support connection-tracking + at all and the remaining connection-tracking capabilities should + all be false. In this case, Open vSwitch will reject flows that + match on the ct_state field or use the ct + action. +

+
+ +

+ If true, it means that the datapath supports the snat + and dnat flags in the OpenFlow ct_state + field. The ct_state capability must be true for this + to make sense. +

+ +

+ If false, Open vSwitch will reject flows that match on the + snat or dnat bits in + ct_state or use nat in the + ct action. +

+
+ + If true, datapath supports OVS_KEY_ATTR_CT_ZONE. If false, Open + vSwitch rejects flows that match on the ct_zone field or + that specify a nonzero zone or a zone field on the ct + action. + + + If true, datapath supports OVS_KEY_ATTR_CT_MARK. If false, Open + vSwitch rejects flows that match on the ct_mark field or + that set ct_mark in the ct action. + + + If true, datapath supports OVS_KEY_ATTR_CT_LABEL. If false, Open + vSwitch rejects flows that match on the ct_label field + or that set ct_label in the ct action. + + +

+ If true, the datapath supports matching the 5-tuple from the + connection's original direction for IPv4 traffic. If false, Open + vSwitch rejects flows that match on ct_nw_src or + ct_nw_dst, that use the ct feature of the + resubmit action, or the force keyword in + the ct action. (The latter isn't tied to connection + tracking support of original tuples in any technical way. They are + conflated because all current datapaths implemented the two + features at the same time.) +

+ +

+ If this and are + both false, Open vSwitch rejects flows that match on + ct_nw_proto, ct_tp_src, or + ct_tp_dst. +

+
+ + If true, the datapath supports matching the 5-tuple from the + connection's original direction for IPv6 traffic. If false, Open + vSwitch rejects flows that match on ct_ipv6_src or + ct_ipv6_dst. + +
+ + True if the datapath supports masked data in OVS_ACTION_ATTR_SET + actions. Masked data can improve performance by allowing megaflows to + match on fewer fields. + + + True if the datapath supports tnl_push and pop actions. This is a + prerequisite for a datapath to support native tunneling. + + + True if the datapath supports OVS_FLOW_ATTR_UFID. UFID support + improves revalidation performance by transferring less data between + the slow path and the datapath. + + + True if the datapath supports OVS_ACTION_ATTR_TRUNC action. If false, + the output action with packet truncation requires every + packet to be sent to the Open vSwitch slow path, which is likely to + make it too slow for mirroring traffic in bulk. + + +

+ When Open vSwitch translates actions from OpenFlow into the datapath + representation, some of the datapath actions may modify the packet or + have other side effects that later datapath actions can't undo. The + OpenFlow ct, meter, output + with truncation, encap, decap, and + dec_nsh_ttl actions fall into this category. Often, + this is not a problem because nothing later on needs the original + packet. +

+ +

+ Such actions can, however, occur in circumstances where the + translation does require the original packet. For example, an + OpenFlow output action might direct a packet to a patch + port, which might in turn lead to a ct action that NATs + the packet (which cannot be undone), and then afterward when control + flow pops back across the patch port some other action might need to + act on the original packet. +

+ +

+ Open vSwitch has two different ways to implement this ``save and + restore'' via datapath actions. These capabilities indicate which + one Open vSwitch will choose. When neither is available, Open + vSwitch simply fails in situations that require this feature. +

+ + +

+ True if the datapath supports OVS_ACTION_ATTR_CLONE action. This + is the preferred option for saving and restoring packets, since it + is intended for the purpose, but old datapaths do not support it. + Open vSwitch will use it whenever it is available. +

+ +

+ (The OpenFlow clone action does not always yield a + OVS_ACTION_ATTR_CLONE action. It only does so when the datapath + supports it and the clone brackets actions that + otherwise cannot be undone.) +

+
+ + Maximum level of nesting allowed by OVS_ACTION_ATTR_SAMPLE action. + Open vSwitch misuses this action for saving and restoring packets + when the datapath supports more than 3 levels of nesting and + OVS_ACTION_ATTR_CLONE is not available. + +
+ + True if the datapath's OVS_ACTION_ATTR_CT action implements the + OVS_CT_ATTR_EVENTMASK attribute. When this is true, Open vSwitch uses + the event mask feature to limit the kinds of events reported to + conntrack update listeners. When Open vSwitch doesn't limit the event + mask, listeners receive reports of numerous usually unimportant events, + such as TCP state machine changes, which can waste CPU time. + + + True if the datapath supports OVS_ACTION_ATTR_CT_CLEAR action. + If false, the OpenFlow ct_clear action has no effect + on the datapath. + + + Highest supported dp_hash algorithm. This allows Open vSwitch to avoid + requesting a packet hash that the datapath does not support. + + + True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. If false, + Open vSwitch implements the check_pkt_larger action by + sending every packet through the Open vSwitch slow path, which is + likely to make it too slow for handling traffic in bulk. + + + True if the datapath supports OVS_CT_ATTR_TIMEOUT in the + OVS_ACTION_ATTR_CT action. If false, Open vswitch cannot implement + timeout policies based on connection tracking zones, as configured + through the CT_Timeout_Policy table. + +
+ The overall purpose of these columns is described under Common Columns at the beginning of this document.