Skip to content

Commit

Permalink
ofproto-dpif-rid: Make lookups cheaper.
Browse files Browse the repository at this point in the history
This patch removes a large-ish copy from the recirculation context
lookup, which is performed for each recirculated upcall and
revalidation of a recirculating flow.

Tunnel metadata has grown large since the addition of Geneve options,
and copying that metadata for performing a lookup is not necessary.
Change recirc_metadata to use a pointer to struct flow_tnl, and only
copy the tunnel metadata when needed, and only copy as little of it as
possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
  • Loading branch information
Jarno Rajahalme committed Aug 26, 2015
1 parent a14502a commit 5978195
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 57 deletions.
22 changes: 1 addition & 21 deletions lib/flow.h
Expand Up @@ -65,28 +65,8 @@ BUILD_ASSERT_DECL(FLOW_N_REGS % 2 == 0); /* Even. */
BUILD_ASSERT_DECL(FLOW_NW_FRAG_ANY == NX_IP_FRAG_ANY);
BUILD_ASSERT_DECL(FLOW_NW_FRAG_LATER == NX_IP_FRAG_LATER);

/* Some flags are exposed through OpenFlow while others are used only
* internally. */

/* Public flags */
#define FLOW_TNL_F_OAM (1 << 0)

#define FLOW_TNL_PUB_F_MASK ((1 << 1) - 1)
BUILD_ASSERT_DECL(FLOW_TNL_F_OAM == NX_TUN_FLAG_OAM);

/* Private flags */
#define FLOW_TNL_F_DONT_FRAGMENT (1 << 1)
#define FLOW_TNL_F_CSUM (1 << 2)
#define FLOW_TNL_F_KEY (1 << 3)

#define FLOW_TNL_F_MASK ((1 << 4) - 1)

/* Purely internal to OVS userspace. These flags should never be exposed to
* the outside world and so aren't included in the flags mask. */

/* Tunnel information is in userspace datapath format. */
#define FLOW_TNL_F_UDPIF (1 << 4)

const char *flow_tun_flag_to_string(uint32_t flags);

/* Maximum number of supported MPLS labels. */
Expand Down Expand Up @@ -988,7 +968,7 @@ pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow)
{
md->recirc_id = flow->recirc_id;
md->dp_hash = flow->dp_hash;
md->tunnel = flow->tunnel;
flow_tnl_copy__(&md->tunnel, &flow->tunnel);
md->skb_priority = flow->skb_priority;
md->pkt_mark = flow->pkt_mark;
md->in_port = flow->in_port;
Expand Down
65 changes: 63 additions & 2 deletions lib/packets.h
Expand Up @@ -35,9 +35,9 @@ struct ds;

/* Tunnel information used in flow key and metadata. */
struct flow_tnl {
ovs_be64 tun_id;
ovs_be32 ip_dst;
ovs_be32 ip_src;
ovs_be64 tun_id;
uint16_t flags;
uint8_t ip_tos;
uint8_t ip_ttl;
Expand All @@ -49,6 +49,65 @@ struct flow_tnl {
struct tun_metadata metadata;
};

/* Some flags are exposed through OpenFlow while others are used only
* internally. */

/* Public flags */
#define FLOW_TNL_F_OAM (1 << 0)

#define FLOW_TNL_PUB_F_MASK ((1 << 1) - 1)

/* Private flags */
#define FLOW_TNL_F_DONT_FRAGMENT (1 << 1)
#define FLOW_TNL_F_CSUM (1 << 2)
#define FLOW_TNL_F_KEY (1 << 3)

#define FLOW_TNL_F_MASK ((1 << 4) - 1)

/* Purely internal to OVS userspace. These flags should never be exposed to
* the outside world and so aren't included in the flags mask. */

/* Tunnel information is in userspace datapath format. */
#define FLOW_TNL_F_UDPIF (1 << 4)

/* Returns an offset to 'src' covering all the meaningful fields in 'src'. */
static inline size_t
flow_tnl_size(const struct flow_tnl *src)
{
if (!src->ip_dst) {
/* Covers ip_dst only. */
return offsetof(struct flow_tnl, ip_src);
}
if (src->flags & FLOW_TNL_F_UDPIF) {
/* Datapath format, cover all options we have. */
return offsetof(struct flow_tnl, metadata.opts)
+ src->metadata.present.len;
}
if (!src->metadata.present.map) {
/* No TLVs, opts is irrelevant. */
return offsetof(struct flow_tnl, metadata.opts);
}
/* Have decoded TLVs, opts is relevant. */
return sizeof *src;
}

/* Copy flow_tnl, but avoid copying unused portions of tun_metadata. Unused
* data in 'dst' is NOT cleared, so this must not be used in cases where the
* uninitialized portion may be hashed over. */
static inline void
flow_tnl_copy__(struct flow_tnl *dst, const struct flow_tnl *src)
{
memcpy(dst, src, flow_tnl_size(src));
}

static inline bool
flow_tnl_equal(const struct flow_tnl *a, const struct flow_tnl *b)
{
size_t a_size = flow_tnl_size(a);

return a_size == flow_tnl_size(b) && !memcmp(a, b, a_size);
}

/* Unfortunately, a "struct flow" sometimes has to handle OpenFlow port
* numbers and other times datapath (dpif) port numbers. This union allows
* access to both. */
Expand All @@ -67,7 +126,9 @@ struct pkt_metadata {
uint32_t skb_priority; /* Packet priority for QoS. */
uint32_t pkt_mark; /* Packet mark. */
union flow_in_port in_port; /* Input port. */
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. Note that
* if 'ip_dst' == 0, the rest of the fields may
* be uninitialized. */
};

static inline void
Expand Down
30 changes: 24 additions & 6 deletions ofproto/ofproto-dpif-rid.c
Expand Up @@ -130,8 +130,16 @@ recirc_metadata_hash(const struct recirc_state *state)

hash = hash_pointer(state->ofproto, 0);
hash = hash_int(state->table_id, hash);
hash = hash_words64((const uint64_t *) &state->metadata,
sizeof state->metadata / sizeof(uint64_t),
if (state->metadata.tunnel->ip_dst) {
/* We may leave remainder bytes unhashed, but that is unlikely as
* the tunnel is not in the datapath format. */
hash = hash_words64((const uint64_t *) state->metadata.tunnel,
flow_tnl_size(state->metadata.tunnel)
/ sizeof(uint64_t), hash);
}
hash = hash_words64((const uint64_t *) &state->metadata.metadata,
(sizeof state->metadata - sizeof state->metadata.tunnel)
/ sizeof(uint64_t),
hash);
if (state->stack && state->stack->size != 0) {
hash = hash_words64((const uint64_t *) state->stack->data,
Expand All @@ -153,7 +161,9 @@ recirc_metadata_equal(const struct recirc_state *a,
{
return (a->table_id == b->table_id
&& a->ofproto == b->ofproto
&& !memcmp(&a->metadata, &b->metadata, sizeof a->metadata)
&& flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel)
&& !memcmp(&a->metadata.metadata, &b->metadata.metadata,
sizeof a->metadata - sizeof a->metadata.tunnel)
&& (((!a->stack || !a->stack->size) &&
(!b->stack || !b->stack->size))
|| (a->stack && b->stack && ofpbuf_equal(a->stack, b->stack)))
Expand Down Expand Up @@ -193,9 +203,13 @@ recirc_ref_equal(const struct recirc_state *target, uint32_t hash)
}

static void
recirc_state_clone(struct recirc_state *new, const struct recirc_state *old)
recirc_state_clone(struct recirc_state *new, const struct recirc_state *old,
struct flow_tnl *tunnel)
{
*new = *old;
flow_tnl_copy__(tunnel, old->metadata.tunnel);
new->metadata.tunnel = tunnel;

if (new->stack) {
new->stack = new->stack->size ? ofpbuf_clone(new->stack) : NULL;
}
Expand All @@ -216,9 +230,11 @@ recirc_alloc_id__(const struct recirc_state *state, uint32_t hash)
ovs_assert(state->action_set_len <= state->ofpacts_len);

struct recirc_id_node *node = xzalloc(sizeof *node);

node->hash = hash;
ovs_refcount_init(&node->refcount);
recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state);
recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state,
&node->state_metadata_tunnel);

ovs_mutex_lock(&mutex);
for (;;) {
Expand Down Expand Up @@ -269,10 +285,12 @@ recirc_alloc_id_ctx(const struct recirc_state *state)
uint32_t
recirc_alloc_id(struct ofproto_dpif *ofproto)
{
struct flow_tnl tunnel;
tunnel.ip_dst = htonl(0);
struct recirc_state state = {
.table_id = TBL_INTERNAL,
.ofproto = ofproto,
.metadata = { .in_port = OFPP_NONE },
.metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE },
};
return recirc_alloc_id__(&state, recirc_metadata_hash(&state))->id;
}
Expand Down
64 changes: 36 additions & 28 deletions ofproto/ofproto-dpif-rid.h
Expand Up @@ -34,40 +34,40 @@ struct rule;
* =============
*
* Recirculation is a technique to allow a frame to re-enter the datapath
* packet processing path for one or multiple times to achieve more flexible
* packet processing, such modifying header fields after MPLS POP action and
* selecting bond a slave port for bond ports.
* packet processing path to achieve more flexible packet processing, such as
* modifying header fields after MPLS POP action and selecting a slave port for
* bond ports.
*
* Data path and user space interface
* -----------------------------------
*
* Recirculation uses two uint32_t fields, recirc_id and dp_hash, and a RECIRC
* action. The value recirc_id is used to select the next packet processing
* steps among multiple instances of recirculation. When a packet initially
* enters the data path it is assigned with recirc_id 0, which indicates no
* recirculation. Recirc_ids are managed by the user space, opaque to the
* data path.
* action. recirc_id is used to select the next packet processing steps among
* multiple instances of recirculation. When a packet initially enters the
* datapath it is assigned with recirc_id 0, which indicates no recirculation.
* Recirc_ids are managed by the user space, opaque to the datapath.
*
* On the other hand, dp_hash can only be computed by the data path, opaque to
* the user space. In fact, user space may not able to recompute the hash
* value. The dp_hash value should be wildcarded for a newly received
* packet. HASH action specifies whether the hash is computed, and if
* computed, how many fields are to be included in the hash computation. The
* computed hash value is stored into the dp_hash field prior to recirculation.
* On the other hand, dp_hash can only be computed by the datapath, opaque to
* the user space, as the datapath is free to choose the hashing algorithm
* without informing user space about it. The dp_hash value should be
* wildcarded for newly received packets. HASH action specifies whether the
* hash is computed, and if computed, how many fields are to be included in the
* hash computation. The computed hash value is stored into the dp_hash field
* prior to recirculation.
*
* The RECIRC action sets the recirc_id field and then reprocesses the packet
* as if it was received on the same input port. RECIRC action works like a
* function call; actions listed behind the RECIRC action will be executed
* after its execution. RECIRC action can be nested, data path implementation
* limits the number of recirculation executed to prevent unreasonable nesting
* depth or infinite loop.
* as if it was received again on the same input port. RECIRC action works
* like a function call; actions listed after the RECIRC action will be
* executed after recirculation. RECIRC action can be nested, but datapath
* implementation limits the number of nested recirculations to prevent
* unreasonable nesting depth or infinite loop.
*
* User space recirculation context
* ---------------------------------
*
* Recirculation is hidden from the OpenFlow controllers. Action translation
* code deduces when recirculation is necessary and issues a data path
* recirculation action. All OpenFlow actions to be performed after
* Recirculation is usually hidden from the OpenFlow controllers. Action
* translation code deduces when recirculation is necessary and issues a
* datapath recirculation action. All OpenFlow actions to be performed after
* recirculation are derived from the OpenFlow pipeline and are stored with the
* recirculation ID. When the OpenFlow tables are changed in a way affecting
* the recirculation flows, new recirculation ID with new metadata and actions
Expand All @@ -76,9 +76,10 @@ struct rule;
* Recirculation ID pool
* ----------------------
*
* Recirculation ID needs to be unique for all data paths. Recirculation ID
* pool keeps track recirculation ids and stores OpenFlow pipeline translation
* context so that flow processing may continue after recirculation.
* Recirculation ID needs to be unique for all datapaths. Recirculation ID
* pool keeps track of recirculation ids and stores OpenFlow pipeline
* translation context so that flow processing may continue after
* recirculation.
*
* A Recirculation ID can be any uint32_t value, except for that the value 0 is
* reserved for 'no recirculation' case.
Expand All @@ -96,7 +97,7 @@ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);

struct recirc_metadata {
/* Metadata in struct flow. */
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
const struct flow_tnl *tunnel; /* Encapsulating tunnel parameters. */
ovs_be64 metadata; /* OpenFlow Metadata. */
uint64_t regs[FLOW_N_XREGS]; /* Registers. */
ofp_port_t in_port; /* Incoming port. */
Expand All @@ -108,7 +109,7 @@ recirc_metadata_from_flow(struct recirc_metadata *md,
const struct flow *flow)
{
memset(md, 0, sizeof *md);
md->tunnel = flow->tunnel;
md->tunnel = &flow->tunnel;
md->metadata = flow->metadata;
memcpy(md->regs, flow->regs, sizeof md->regs);
md->in_port = flow->in_port.ofp_port;
Expand All @@ -119,7 +120,11 @@ static inline void
recirc_metadata_to_flow(const struct recirc_metadata *md,
struct flow *flow)
{
flow->tunnel = md->tunnel;
if (md->tunnel && md->tunnel->ip_dst) {
flow->tunnel = *md->tunnel;
} else {
memset(&flow->tunnel, 0, sizeof flow->tunnel);
}
flow->metadata = md->metadata;
memcpy(flow->regs, md->regs, sizeof flow->regs);
flow->in_port.ofp_port = md->in_port;
Expand Down Expand Up @@ -161,6 +166,9 @@ struct recirc_id_node {
* This state should not be modified after inserting a node in the pool,
* hence the 'const' to emphasize that. */
const struct recirc_state state;

/* Storage for tunnel metadata. */
struct flow_tnl state_metadata_tunnel;
};

void recirc_init(void);
Expand Down

0 comments on commit 5978195

Please sign in to comment.