Skip to content

Commit

Permalink
dpif-netdev: Translate Geneve options per-flow, not per-packet.
Browse files Browse the repository at this point in the history
The kernel implementation of Geneve options stores the TLV option
data in the flow exactly as received, without any further parsing.
This is then translated to known options for the purposes of matching
on flow setup (which will then install a datapath flow in the form
the kernel is expecting).

The userspace implementation behaves a little bit differently - it
looks up known options as each packet is received. The reason for this
is there is a much tighter coupling between datapath and flow translation
and the representation is generally expected to be the same. This works
but it incurs work on a per-packet basis that could be done per-flow
instead.

This introduces a small translation step for Geneve packets between
datapath and flow lookup for the userspace datapath in order to
allow the same kind of processing that the kernel does.

There is a second benefit to this as well: for some operations it is
preferable to keep the options exactly as they were received on the wire,
which this enables. One example is that for packets that are executed from
ofproto-dpif-upcall to the datapath, this avoids the translation of
Geneve metadata. Since this conversion is potentially lossy (for unknown
options), keeping everything in the same format removes the possibility
of dropping options if the packet comes back up to userspace and the
Geneve option translation table has changed. To help with these types of
operations, most functions can understand both formats of data and seamlessly
do the right thing.

Signed-off-by: Jesse Gross <jesse@nicira.com>
  • Loading branch information
jessegross committed Jul 30, 2015
1 parent 3de9dde commit 18162da
Show file tree
Hide file tree
Showing 16 changed files with 534 additions and 223 deletions.
1 change: 1 addition & 0 deletions lib/automake.mk
Expand Up @@ -81,6 +81,7 @@ lib_libopenvswitch_la_SOURCES = \
lib/fatal-signal.h \
lib/flow.c \
lib/flow.h \
lib/geneve.h \
lib/guarded-list.c \
lib/guarded-list.h \
lib/hash.c \
Expand Down
55 changes: 50 additions & 5 deletions lib/dpif-netdev.c
Expand Up @@ -1884,8 +1884,8 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
if (mask_key_len) {
enum odp_key_fitness fitness;

fitness = odp_flow_key_to_mask(mask_key, mask_key_len, key, key_len,
&wc->masks, flow);
fitness = odp_flow_key_to_mask_udpif(mask_key, mask_key_len, key,
key_len, &wc->masks, flow);
if (fitness) {
/* This should not happen: it indicates that
* odp_flow_key_from_mask() and odp_flow_key_to_mask()
Expand Down Expand Up @@ -1919,7 +1919,7 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
{
odp_port_t in_port;

if (odp_flow_key_to_flow(key, key_len, flow)) {
if (odp_flow_key_to_flow_udpif(key, key_len, flow)) {
/* This should not happen: it indicates that odp_flow_key_from_flow()
* and odp_flow_key_to_flow() disagree on the acceptable form of a
* flow. Log the problem as an error, with enough details to enable
Expand Down Expand Up @@ -3014,11 +3014,25 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
struct ofpbuf *actions, struct ofpbuf *put_actions)
{
struct dp_netdev *dp = pmd->dp;
struct flow_tnl orig_tunnel;
int err;

if (OVS_UNLIKELY(!dp->upcall_cb)) {
return ENODEV;
}

orig_tunnel.flags = flow->tunnel.flags;
if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) {
orig_tunnel.metadata.present.len = flow->tunnel.metadata.present.len;
memcpy(orig_tunnel.metadata.opts.gnv, flow->tunnel.metadata.opts.gnv,
flow->tunnel.metadata.present.len);
err = tun_metadata_from_geneve_udpif(&orig_tunnel, &orig_tunnel,
&flow->tunnel);
if (err) {
return err;
}
}

if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) {
struct ds ds = DS_EMPTY_INITIALIZER;
char *packet_str;
Expand Down Expand Up @@ -3046,8 +3060,39 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
ds_destroy(&ds);
}

return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
actions, wc, put_actions, dp->upcall_aux);
err = dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
actions, wc, put_actions, dp->upcall_aux);
if (err && err != ENOSPC) {
return err;
}

if (wc) {
if (wc->masks.tunnel.metadata.present.map) {
struct geneve_opt opts[GENEVE_TOT_OPT_SIZE /
sizeof(struct geneve_opt)];

tun_metadata_to_geneve_udpif_mask(&flow->tunnel,
&wc->masks.tunnel,
orig_tunnel.metadata.opts.gnv,
orig_tunnel.metadata.present.len,
opts);

memset(&wc->masks.tunnel.metadata, 0,
sizeof wc->masks.tunnel.metadata);
memcpy(&wc->masks.tunnel.metadata.opts.gnv, opts,
orig_tunnel.metadata.present.len);
}
wc->masks.tunnel.metadata.present.len = 0xff;
}

if (orig_tunnel.flags & FLOW_TNL_F_UDPIF) {
memcpy(&flow->tunnel.metadata.opts.gnv, orig_tunnel.metadata.opts.gnv,
orig_tunnel.metadata.present.len);
flow->tunnel.metadata.present.len = orig_tunnel.metadata.present.len;
flow->tunnel.flags |= FLOW_TNL_F_UDPIF;
}

return err;
}

static inline uint32_t
Expand Down
48 changes: 38 additions & 10 deletions lib/flow.c
Expand Up @@ -462,9 +462,22 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
miniflow_push_words(mf, tunnel, &md->tunnel,
offsetof(struct flow_tnl, metadata) /
sizeof(uint64_t));
if (md->tunnel.metadata.opt_map) {
miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
sizeof md->tunnel.metadata / sizeof(uint64_t));

if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) {
if (md->tunnel.metadata.present.map) {
miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
sizeof md->tunnel.metadata /
sizeof(uint64_t));
}
} else {
if (md->tunnel.metadata.present.len) {
miniflow_push_words(mf, tunnel.metadata.present,
&md->tunnel.metadata.present, 1);
miniflow_push_words(mf, tunnel.metadata.opts.gnv,
md->tunnel.metadata.opts.gnv,
DIV_ROUND_UP(md->tunnel.metadata.present.len,
sizeof(uint64_t)));
}
}
}
if (md->skb_priority || md->pkt_mark) {
Expand Down Expand Up @@ -815,7 +828,7 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
if (flow->tunnel.gbp_flags) {
match_set_tun_gbp_flags(flow_metadata, flow->tunnel.gbp_flags);
}
tun_metadata_get_fmd(&flow->tunnel.metadata, flow_metadata);
tun_metadata_get_fmd(&flow->tunnel, flow_metadata);
if (flow->metadata != htonll(0)) {
match_set_metadata(flow_metadata, flow->metadata);
}
Expand Down Expand Up @@ -1161,9 +1174,16 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc,
WC_MASK_FIELD(wc, tunnel.gbp_id);
WC_MASK_FIELD(wc, tunnel.gbp_flags);

if (flow->tunnel.metadata.opt_map) {
wc->masks.tunnel.metadata.opt_map = flow->tunnel.metadata.opt_map;
WC_MASK_FIELD(wc, tunnel.metadata.opts);
if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
if (flow->tunnel.metadata.present.map) {
wc->masks.tunnel.metadata.present.map =
flow->tunnel.metadata.present.map;
WC_MASK_FIELD(wc, tunnel.metadata.opts.u8);
}
} else {
WC_MASK_FIELD(wc, tunnel.metadata.present.len);
memset(wc->masks.tunnel.metadata.opts.gnv, 0xff,
flow->tunnel.metadata.present.len);
}
} else if (flow->tunnel.tun_id) {
WC_MASK_FIELD(wc, tunnel.tun_id);
Expand Down Expand Up @@ -1253,9 +1273,17 @@ flow_wc_map(const struct flow *flow, struct miniflow *map)

map->tnl_map = 0;
if (flow->tunnel.ip_dst) {
map->tnl_map = MINIFLOW_TNL_MAP(tunnel);
if (!flow->tunnel.metadata.opt_map) {
map->tnl_map &= ~MINIFLOW_TNL_MAP(tunnel.metadata);
map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel,
offsetof(struct flow_tnl, metadata));
if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
if (flow->tunnel.metadata.present.map) {
map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel.metadata,
sizeof(flow->tunnel.metadata));
}
} else {
map->tnl_map |= MINIFLOW_TNL_MAP(tunnel.metadata.present.len);
map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel.metadata.opts.gnv,
flow->tunnel.metadata.present.len);
}
}

Expand Down
13 changes: 11 additions & 2 deletions lib/flow.h
Expand Up @@ -80,6 +80,12 @@ BUILD_ASSERT_DECL(FLOW_TNL_F_OAM == NX_TUN_FLAG_OAM);

#define FLOW_TNL_F_MASK ((1 << 4) - 1)

/* Purely internal to OVS userspace. These flags should never be exposed to
* the outside world and so aren't included in the flags mask. */

/* Tunnel information is in userspace datapath format. */
#define FLOW_TNL_F_UDPIF (1 << 4)

const char *flow_tun_flag_to_string(uint32_t flags);

/* Maximum number of supported MPLS labels. */
Expand Down Expand Up @@ -518,9 +524,12 @@ flow_values_get_next_in_maps(struct flow_for_each_in_maps_aux *aux,
#define FLOW_U64_SIZE(FIELD) \
DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint64_t))

#define MINIFLOW_TNL_MAP(FIELD) \
(((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1) \
#define MINIFLOW_TNL_MAP__(FIELD, LEN) \
(((UINT64_C(1) << DIV_ROUND_UP(LEN, sizeof(uint64_t))) - 1) \
<< (offsetof(struct flow, FIELD) / sizeof(uint64_t)))

#define MINIFLOW_TNL_MAP(FIELD) \
MINIFLOW_TNL_MAP__(FIELD, sizeof(((struct flow *)0)->FIELD))
#define MINIFLOW_PKT_MAP(FIELD) \
(((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1) \
<< ((offsetof(struct flow, FIELD) / sizeof(uint64_t)) - FLOW_TNL_U64S))
Expand Down
63 changes: 63 additions & 0 deletions lib/geneve.h
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GENEVE_H
#define GENEVE_H 1

#include "openvswitch/types.h"

#define GENEVE_MAX_OPT_SIZE 124
#define GENEVE_TOT_OPT_SIZE 252

#define GENEVE_CRIT_OPT_TYPE (1 << 7)

struct geneve_opt {
ovs_be16 opt_class;
uint8_t type;
#ifdef WORDS_BIGENDIAN
uint8_t r1:1;
uint8_t r2:1;
uint8_t r3:1;
uint8_t length:5;
#else
uint8_t length:5;
uint8_t r3:1;
uint8_t r2:1;
uint8_t r1:1;
#endif
/* Option data */
};

struct genevehdr {
#ifdef WORDS_BIGENDIAN
uint8_t ver:2;
uint8_t opt_len:6;
uint8_t oam:1;
uint8_t critical:1;
uint8_t rsvd1:6;
#else
uint8_t opt_len:6;
uint8_t ver:2;
uint8_t rsvd1:6;
uint8_t critical:1;
uint8_t oam:1;
#endif
ovs_be16 proto_type;
ovs_16aligned_be32 vni;
struct geneve_opt options[];
};

#endif /* geneve.h */
6 changes: 3 additions & 3 deletions lib/meta-flow.c
Expand Up @@ -196,7 +196,7 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc)
CASE_MFF_TUN_METADATA: {
union mf_value value;

tun_metadata_read(&wc->masks.tunnel.metadata, mf, &value);
tun_metadata_read(&wc->masks.tunnel, mf, &value);
return is_all_zeros(&value.tun_metadata, mf->n_bytes);
}
case MFF_METADATA:
Expand Down Expand Up @@ -616,7 +616,7 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow,
value->u8 = flow->tunnel.ip_tos;
break;
CASE_MFF_TUN_METADATA:
tun_metadata_read(&flow->tunnel.metadata, mf, value);
tun_metadata_read(&flow->tunnel, mf, value);
break;

case MFF_METADATA:
Expand Down Expand Up @@ -1119,7 +1119,7 @@ mf_set_flow_value(const struct mf_field *mf,
flow->tunnel.ip_ttl = value->u8;
break;
CASE_MFF_TUN_METADATA:
tun_metadata_write(&flow->tunnel.metadata, mf, value);
tun_metadata_write(&flow->tunnel, mf, value);
break;
case MFF_METADATA:
flow->metadata = value->be64;
Expand Down
26 changes: 11 additions & 15 deletions lib/netdev-vport.c
Expand Up @@ -1054,11 +1054,10 @@ parse_gre_header(struct dp_packet *packet,
static void
pkt_metadata_init_tnl(struct pkt_metadata *md)
{
memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata));

/* If 'opt_map' is zero then none of the rest of the tunnel metadata
* will be read, so we can skip clearing it. */
md->tunnel.metadata.opt_map = 0;
/* Zero up through the tunnel metadata options. The length and table
* are before this and as long as they are empty, the options won't
* be looked at. */
memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata.opts));
}

static int
Expand Down Expand Up @@ -1208,8 +1207,7 @@ netdev_geneve_pop_header(struct dp_packet *packet)
struct pkt_metadata *md = &packet->md;
struct flow_tnl *tnl = &md->tunnel;
struct genevehdr *gnh;
unsigned int hlen;
int err;
unsigned int hlen, opts_len;

pkt_metadata_init_tnl(md);
if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
Expand All @@ -1223,7 +1221,8 @@ netdev_geneve_pop_header(struct dp_packet *packet)
return EINVAL;
}

hlen = GENEVE_BASE_HLEN + gnh->opt_len * 4;
opts_len = gnh->opt_len * 4;
hlen = GENEVE_BASE_HLEN + opts_len;
if (hlen > dp_packet_size(packet)) {
VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n",
hlen, dp_packet_size(packet));
Expand All @@ -1245,12 +1244,9 @@ netdev_geneve_pop_header(struct dp_packet *packet)
tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
tnl->flags |= FLOW_TNL_F_KEY;

err = tun_metadata_from_geneve_header(gnh->options, gnh->opt_len * 4,
&tnl->metadata);
if (err) {
VLOG_WARN_RL(&err_rl, "invalid geneve options");
return err;
}
memcpy(tnl->metadata.opts.gnv, gnh->options, opts_len);
tnl->metadata.present.len = opts_len;
tnl->flags |= FLOW_TNL_F_UDPIF;

dp_packet_reset_packet(packet, hlen);

Expand Down Expand Up @@ -1278,7 +1274,7 @@ netdev_geneve_build_header(const struct netdev *netdev,

ovs_mutex_unlock(&dev->mutex);

opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel.metadata,
opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel,
gnh->options, &crit_opt);

gnh->opt_len = opt_len / 4;
Expand Down
2 changes: 1 addition & 1 deletion lib/odp-execute.c
Expand Up @@ -151,7 +151,7 @@ odp_set_tunnel_action(const struct nlattr *a, struct flow_tnl *tun_key)
{
enum odp_key_fitness fitness;

fitness = odp_tun_key_from_attr(a, tun_key);
fitness = odp_tun_key_from_attr(a, true, tun_key);
ovs_assert(fitness != ODP_FIT_ERROR);
}

Expand Down

0 comments on commit 18162da

Please sign in to comment.