Skip to content

Commit

Permalink
dpif-netdev: retrieve flow directly from the flow mark
Browse files Browse the repository at this point in the history
So that we could skip some very costly CPU operations, including but
not limiting to miniflow_extract, emc lookup, dpcls lookup, etc. Thus,
performance could be greatly improved.

A PHY-PHY forwarding with 1000 mega flows (udp,tp_src=1000-1999) and
1 million streams (tp_src=1000-1999, tp_dst=2000-2999) show more that
260% performance boost.

Note that though the heavy miniflow_extract is skipped, we still have
to do per packet checking, due to we have to check the tcp_flags.

Co-authored-by: Finn Christensen <fc@napatech.com>
Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
Signed-off-by: Finn Christensen <fc@napatech.com>
Co-authored-by: Shahaf Shuler <shahafs@mellanox.com>
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>
  • Loading branch information
3 people authored and istokes committed Jul 6, 2018
1 parent 62b0859 commit aab96ec
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 6 deletions.
13 changes: 13 additions & 0 deletions lib/dp-packet.h
Expand Up @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
#define reset_dp_packet_checksum_ol_flags(arg)
#endif

static inline bool
dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
uint32_t *mark OVS_UNUSED)
{
#ifdef DPDK_NETDEV
if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
*mark = p->mbuf.hash.fdir.hi;
return true;
}
#endif
return false;
}

enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */

struct dp_packet_batch {
Expand Down
46 changes: 40 additions & 6 deletions lib/dpif-netdev.c
Expand Up @@ -2119,6 +2119,23 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
}
}

static struct dp_netdev_flow *
mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd,
const uint32_t mark)
{
struct dp_netdev_flow *flow;

CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0),
&flow_mark.mark_to_flow) {
if (flow->mark == mark && flow->pmd_id == pmd->core_id &&
flow->dead == false) {
return flow;
}
}

return NULL;
}

static void
dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev_flow *flow)
Expand Down Expand Up @@ -5366,10 +5383,10 @@ struct packet_batch_per_flow {
static inline void
packet_batch_per_flow_update(struct packet_batch_per_flow *batch,
struct dp_packet *packet,
const struct miniflow *mf)
uint16_t tcp_flags)
{
batch->byte_count += dp_packet_size(packet);
batch->tcp_flags |= miniflow_get_tcp_flags(mf);
batch->tcp_flags |= tcp_flags;
batch->array.packets[batch->array.count++] = packet;
}

Expand Down Expand Up @@ -5403,7 +5420,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,

static inline void
dp_netdev_queue_batches(struct dp_packet *pkt,
struct dp_netdev_flow *flow, const struct miniflow *mf,
struct dp_netdev_flow *flow, uint16_t tcp_flags,
struct packet_batch_per_flow *batches,
size_t *n_batches)
{
Expand All @@ -5414,7 +5431,7 @@ dp_netdev_queue_batches(struct dp_packet *pkt,
packet_batch_per_flow_init(batch, flow);
}

packet_batch_per_flow_update(batch, pkt, mf);
packet_batch_per_flow_update(batch, pkt, tcp_flags);
}

/* Try to process all ('cnt') the 'packets' using only the exact match cache
Expand Down Expand Up @@ -5445,6 +5462,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
const size_t cnt = dp_packet_batch_size(packets_);
uint32_t cur_min;
int i;
uint16_t tcp_flags;

atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
pmd_perf_update_counter(&pmd->perf_stats,
Expand All @@ -5453,6 +5471,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,

DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) {
struct dp_netdev_flow *flow;
uint32_t mark;

if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
dp_packet_delete(packet);
Expand All @@ -5470,6 +5489,18 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
if (!md_is_valid) {
pkt_metadata_init(&packet->md, port_no);
}

if ((*recirc_depth_get() == 0) &&
dp_packet_has_flow_mark(packet, &mark)) {
flow = mark_to_flow_find(pmd, mark);
if (flow) {
tcp_flags = parse_tcp_flags(packet);
dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
n_batches);
continue;
}
}

miniflow_extract(packet, &key->mf);
key->len = 0; /* Not computed yet. */
/* If EMC is disabled skip hash computation and emc_lookup */
Expand All @@ -5485,7 +5516,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
flow = NULL;
}
if (OVS_LIKELY(flow)) {
dp_netdev_queue_batches(packet, flow, &key->mf, batches,
tcp_flags = miniflow_get_tcp_flags(&key->mf);
dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
n_batches);
} else {
/* Exact match cache missed. Group missed packets together at
Expand Down Expand Up @@ -5672,7 +5704,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
flow = dp_netdev_flow_cast(rules[i]);

emc_probabilistic_insert(pmd, &keys[i], flow);
dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
dp_netdev_queue_batches(packet, flow,
miniflow_get_tcp_flags(&keys[i].mf),
batches, n_batches);
}

pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
Expand Down
67 changes: 67 additions & 0 deletions lib/flow.c
Expand Up @@ -1019,6 +1019,73 @@ parse_dl_type(const struct eth_header *data_, size_t size)
return parse_ethertype(&data, &size);
}

uint16_t
parse_tcp_flags(struct dp_packet *packet)
{
const void *data = dp_packet_data(packet);
const char *frame = (const char *)data;
size_t size = dp_packet_size(packet);
ovs_be16 dl_type;
uint8_t nw_frag = 0, nw_proto = 0;

if (packet->packet_type != htonl(PT_ETH)) {
return 0;
}

dp_packet_reset_offsets(packet);

data_pull(&data, &size, ETH_ADDR_LEN * 2);
dl_type = parse_ethertype(&data, &size);
if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
packet->l2_5_ofs = (char *)data - frame;
}
if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
const struct ip_header *nh = data;
int ip_len;
uint16_t tot_len;

if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) {
return 0;
}
dp_packet_set_l2_pad_size(packet, size - tot_len);
packet->l3_ofs = (uint16_t)((char *)nh - frame);
nw_proto = nh->ip_proto;
nw_frag = ipv4_get_nw_frag(nh);

size = tot_len; /* Never pull padding. */
data_pull(&data, &size, ip_len);
} else if (dl_type == htons(ETH_TYPE_IPV6)) {
const struct ovs_16aligned_ip6_hdr *nh = data;
uint16_t plen;

if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
return 0;
}
packet->l3_ofs = (uint16_t)((char *)nh - frame);
data_pull(&data, &size, sizeof *nh);

plen = ntohs(nh->ip6_plen); /* Never pull padding. */
dp_packet_set_l2_pad_size(packet, size - plen);
size = plen;
if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {
return 0;
}
nw_proto = nh->ip6_nxt;
} else {
return 0;
}

packet->l4_ofs = (uint16_t)((char *)data - frame);
if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == IPPROTO_TCP &&
size >= TCP_HEADER_LEN) {
const struct tcp_header *tcp = data;

return TCP_FLAGS(tcp->tcp_ctl);
}

return 0;
}

/* For every bit of a field that is wildcarded in 'wildcards', sets the
* corresponding bit in 'flow' to zero. */
void
Expand Down
1 change: 1 addition & 0 deletions lib/flow.h
Expand Up @@ -133,6 +133,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
uint8_t *nw_frag);
ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size);
bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key);
uint16_t parse_tcp_flags(struct dp_packet *packet);

static inline uint64_t
flow_get_xreg(const struct flow *flow, int idx)
Expand Down

0 comments on commit aab96ec

Please sign in to comment.