Skip to content

Commit

Permalink
ct-dpif,dpif-netlink: Support conntrack flush by ct 5-tuple
Browse files Browse the repository at this point in the history
This patch adds support of flushing a conntrack entry specified by the
conntrack 5-tuple, and provides the implementation in dpif-netlink.
The implementation of dpif-netlink in the linux datapath utilizes the
NFNL_SUBSYS_CTNETLINK netlink subsystem to delete a conntrack entry in
nf_conntrack.  Future patches will add support for the userspace and
Windows datapaths.

VMWare-BZ: #1983178
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Justin Pettit <jpettit@ovn.org>
  • Loading branch information
YiHungWei authored and justinpettit committed Dec 7, 2017
1 parent 0bd28b0 commit 817a765
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 16 deletions.
24 changes: 18 additions & 6 deletions lib/ct-dpif.c
Expand Up @@ -110,20 +110,32 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump)
: EOPNOTSUPP);
}

/* Flush the entries in the connection tracker used by 'dpif'.
/* Flush the entries in the connection tracker used by 'dpif'. The
* arguments have the following behavior:
*
* If 'zone' is not NULL, flush only the entries in '*zone'. */
* - If both 'zone' and 'tuple' are NULL, flush all the conntrack entries.
* - If 'zone' is not NULL, and 'tuple' is NULL, flush all the conntrack
* entries in '*zone'.
* - If 'tuple' is not NULL, flush the conntrack entry specified by 'tuple'
* in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */
int
ct_dpif_flush(struct dpif *dpif, const uint16_t *zone)
ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
const struct ct_dpif_tuple *tuple)
{
if (zone) {
VLOG_DBG("%s: ct_flush: %"PRIu16, dpif_name(dpif), *zone);
if (tuple) {
struct ds ds = DS_EMPTY_INITIALIZER;
ct_dpif_format_tuple(&ds, tuple);
VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds),
zone ? *zone : 0);
ds_destroy(&ds);
} else if (zone) {
VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone);
} else {
VLOG_DBG("%s: ct_flush: <all>", dpif_name(dpif));
}

return (dpif->dpif_class->ct_flush
? dpif->dpif_class->ct_flush(dpif, zone)
? dpif->dpif_class->ct_flush(dpif, zone, tuple)
: EOPNOTSUPP);
}

Expand Down
3 changes: 2 additions & 1 deletion lib/ct-dpif.h
Expand Up @@ -195,7 +195,8 @@ int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **,
const uint16_t *zone, int *);
int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *);
int ct_dpif_dump_done(struct ct_dpif_dump_state *);
int ct_dpif_flush(struct dpif *, const uint16_t *zone);
int ct_dpif_flush(struct dpif *, const uint16_t *zone,
const struct ct_dpif_tuple *);
void ct_dpif_entry_uninit(struct ct_dpif_entry *);
void ct_dpif_format_entry(const struct ct_dpif_entry *, struct ds *,
bool verbose, bool print_stats);
Expand Down
2 changes: 1 addition & 1 deletion lib/dpctl.c
Expand Up @@ -1353,7 +1353,7 @@ dpctl_flush_conntrack(int argc, const char *argv[],
return error;
}

error = ct_dpif_flush(dpif, pzone);
error = ct_dpif_flush(dpif, pzone, NULL);

dpif_close(dpif);
return error;
Expand Down
6 changes: 5 additions & 1 deletion lib/dpif-netdev.c
Expand Up @@ -5734,10 +5734,14 @@ dpif_netdev_ct_dump_done(struct dpif *dpif OVS_UNUSED,
}

static int
dpif_netdev_ct_flush(struct dpif *dpif, const uint16_t *zone)
dpif_netdev_ct_flush(struct dpif *dpif, const uint16_t *zone,
const struct ct_dpif_tuple *tuple)
{
struct dp_netdev *dp = get_dp_netdev(dpif);

if (tuple) {
return EOPNOTSUPP;
}
return conntrack_flush(&dp->conntrack, zone);
}

Expand Down
7 changes: 5 additions & 2 deletions lib/dpif-netlink.c
Expand Up @@ -2892,9 +2892,12 @@ dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
}

static int
dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone)
dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
const struct ct_dpif_tuple *tuple)
{
if (zone) {
if (tuple) {
return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
} else if (zone) {
return nl_ct_flush_zone(*zone);
} else {
return nl_ct_flush();
Expand Down
16 changes: 13 additions & 3 deletions lib/dpif-provider.h
Expand Up @@ -75,6 +75,7 @@ dpif_flow_dump_thread_init(struct dpif_flow_dump_thread *thread,

struct ct_dpif_dump_state;
struct ct_dpif_entry;
struct ct_dpif_tuple;

/* Datapath interface class structure, to be defined by each implementation of
* a datapath interface.
Expand Down Expand Up @@ -424,9 +425,18 @@ struct dpif_class {
struct ct_dpif_entry *entry);
int (*ct_dump_done)(struct dpif *, struct ct_dpif_dump_state *state);

/* Flushes the connection tracking tables. If 'zone' is not NULL,
* only deletes connections in '*zone'. */
int (*ct_flush)(struct dpif *, const uint16_t *zone);
/* Flushes the connection tracking tables. The arguments have the
* following behavior:
*
* - If both 'zone' and 'tuple' are NULL, flush all the conntrack
* entries.
* - If 'zone' is not NULL, and 'tuple' is NULL, flush all the
* conntrack entries in '*zone'.
* - If 'tuple' is not NULL, flush the conntrack entry specified by
* 'tuple' in '*zone'. If 'zone' is NULL, use the default zone
* (zone 0). */
int (*ct_flush)(struct dpif *, const uint16_t *zone,
const struct ct_dpif_tuple *tuple);

/* Meters */

Expand Down
97 changes: 97 additions & 0 deletions lib/netlink-conntrack.c
Expand Up @@ -18,6 +18,7 @@

#include "netlink-conntrack.h"

#include <errno.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/netfilter/nf_conntrack_common.h>
Expand Down Expand Up @@ -111,6 +112,8 @@ static bool nl_ct_parse_header_policy(struct ofpbuf *buf,
static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
uint8_t nfgen_family);
static bool nl_ct_put_ct_tuple(struct ofpbuf *buf,
const struct ct_dpif_tuple *tuple, enum ctattr_type type);

struct nl_ct_dump_state {
struct nl_dump dump;
Expand Down Expand Up @@ -239,6 +242,27 @@ nl_ct_flush(void)
return err;
}

int
nl_ct_flush_tuple(const struct ct_dpif_tuple *tuple, uint16_t zone)
{
int err;
struct ofpbuf buf;

ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
nl_msg_put_nfgenmsg(&buf, 0, tuple->l3_type, NFNL_SUBSYS_CTNETLINK,
IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);

nl_msg_put_be16(&buf, CTA_ZONE, htons(zone));
if (!nl_ct_put_ct_tuple(&buf, tuple, CTA_TUPLE_ORIG)) {
err = EOPNOTSUPP;
goto out;
}
err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
out:
ofpbuf_uninit(&buf);
return err;
}

#ifdef _WIN32
int
nl_ct_flush_zone(uint16_t flush_zone)
Expand Down Expand Up @@ -517,6 +541,79 @@ nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
return parsed;
}

static bool
nl_ct_put_tuple_ip(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
{
size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_IP);

if (tuple->l3_type == AF_INET) {
nl_msg_put_be32(buf, CTA_IP_V4_SRC, tuple->src.ip);
nl_msg_put_be32(buf, CTA_IP_V4_DST, tuple->dst.ip);
} else if (tuple->l3_type == AF_INET6) {
nl_msg_put_in6_addr(buf, CTA_IP_V6_SRC, &tuple->src.in6);
nl_msg_put_in6_addr(buf, CTA_IP_V6_DST, &tuple->dst.in6);
} else {
VLOG_WARN_RL(&rl, "Unsupported IP protocol: %"PRIu16".",
tuple->l3_type);
return false;
}

nl_msg_end_nested(buf, offset);
return true;
}

static bool
nl_ct_put_tuple_proto(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
{
size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_PROTO);

nl_msg_put_u8(buf, CTA_PROTO_NUM, tuple->ip_proto);

if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
nl_msg_put_be16(buf, CTA_PROTO_ICMP_ID, tuple->icmp_id);
nl_msg_put_u8(buf, CTA_PROTO_ICMP_TYPE, tuple->icmp_type);
nl_msg_put_u8(buf, CTA_PROTO_ICMP_CODE, tuple->icmp_code);
} else if (tuple->l3_type == AF_INET6 &&
tuple->ip_proto == IPPROTO_ICMPV6) {
nl_msg_put_be16(buf, CTA_PROTO_ICMPV6_ID, tuple->icmp_id);
nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_TYPE, tuple->icmp_type);
nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_CODE, tuple->icmp_code);
} else if (tuple->ip_proto == IPPROTO_TCP ||
tuple->ip_proto == IPPROTO_UDP) {
nl_msg_put_be16(buf, CTA_PROTO_SRC_PORT, tuple->src_port);
nl_msg_put_be16(buf, CTA_PROTO_DST_PORT, tuple->dst_port);
} else {
VLOG_WARN_RL(&rl, "Unsupported L4 protocol: %"PRIu8".",
tuple->ip_proto);
return false;
}

nl_msg_end_nested(buf, offset);
return true;
}

static bool
nl_ct_put_ct_tuple(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple,
enum ctattr_type type)
{
if (type != CTA_TUPLE_ORIG && type != CTA_TUPLE_REPLY &&
type != CTA_TUPLE_MASTER) {
return false;
}

size_t offset = nl_msg_start_nested(buf, type);

if (!nl_ct_put_tuple_ip(buf, tuple)) {
return false;
}
if (!nl_ct_put_tuple_proto(buf, tuple)) {
return false;
}

nl_msg_end_nested(buf, offset);
return true;
}

/* Translate netlink TCP state to CT_DPIF_TCP state. */
static uint8_t
nl_ct_tcp_state_to_dpif(uint8_t state)
Expand Down
1 change: 1 addition & 0 deletions lib/netlink-conntrack.h
Expand Up @@ -42,6 +42,7 @@ int nl_ct_dump_done(struct nl_ct_dump_state *);

int nl_ct_flush(void);
int nl_ct_flush_zone(uint16_t zone);
int nl_ct_flush_tuple(const struct ct_dpif_tuple *, uint16_t zone);

bool nl_ct_parse_entry(struct ofpbuf *, struct ct_dpif_entry *,
enum nl_ct_event_type *);
Expand Down
2 changes: 1 addition & 1 deletion ofproto/ofproto-dpif.c
Expand Up @@ -4850,7 +4850,7 @@ ct_flush(const struct ofproto *ofproto_, const uint16_t *zone)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);

ct_dpif_flush(ofproto->backer->dpif, zone);
ct_dpif_flush(ofproto->backer->dpif, zone, NULL);
}

static bool
Expand Down
2 changes: 1 addition & 1 deletion tests/ovs-ofctl.at
Expand Up @@ -3167,7 +3167,7 @@ AT_CHECK([ovs-appctl vlog/set ct_dpif:dbg])
AT_CHECK([ovs-ofctl ct-flush-zone br0 123])

OVS_WAIT_UNTIL([grep -q "|ct_dpif|DBG|.*ct_flush:" ovs-vswitchd.log])
AT_CHECK([grep -q "ct_dpif|DBG|.*ct_flush: 123" ovs-vswitchd.log])
AT_CHECK([grep -q "ct_dpif|DBG|.*ct_flush: zone 123" ovs-vswitchd.log])

OVS_VSWITCHD_STOP
AT_CLEANUP

0 comments on commit 817a765

Please sign in to comment.