Skip to content

Commit

Permalink
netfilter: add netfilter hooks to SRv6 data plane
Browse files Browse the repository at this point in the history
This patch introduces netfilter hooks for solving the problem that
conntrack couldn't record both inner flows and outer flows.

This patch also introduces a new sysctl toggle for enabling lightweight
tunnel netfilter hooks.

Signed-off-by: Ryoga Saito <contact@proelbtn.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  • Loading branch information
proelbtn authored and ummakynes committed Aug 29, 2021
1 parent 7bc416f commit 7a3f5b0
Show file tree
Hide file tree
Showing 9 changed files with 241 additions and 36 deletions.
7 changes: 7 additions & 0 deletions Documentation/networking/nf_conntrack-sysctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds)
This extended timeout will be used in case there is an GRE stream
detected.

nf_hooks_lwtunnel - BOOLEAN
- 0 - disabled (default)
- not 0 - enabled

If this option is enabled, the lightweight tunnel netfilter hooks are
enabled. This option cannot be disabled once it is enabled.

nf_flowtable_tcp_timeout - INTEGER (seconds)
default 30

Expand Down
3 changes: 3 additions & 0 deletions include/net/lwtunnel.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ struct lwtunnel_encap_ops {
};

#ifdef CONFIG_LWTUNNEL

DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);

void lwtstate_free(struct lwtunnel_state *lws);

static inline struct lwtunnel_state *
Expand Down
7 changes: 7 additions & 0 deletions include/net/netfilter/nf_hooks_lwtunnel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <linux/sysctl.h>
#include <linux/types.h>

#ifdef CONFIG_SYSCTL
int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
#endif
3 changes: 3 additions & 0 deletions net/core/lwtunnel.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
#include <net/ip6_fib.h>
#include <net/rtnh.h>

DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);

#ifdef CONFIG_MODULES

static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
Expand Down
75 changes: 72 additions & 3 deletions net/ipv6/seg6_iptunnel.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
#include <net/lwtunnel.h>
#include <linux/netfilter.h>

static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{
Expand Down Expand Up @@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb)

ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);

return 0;
}

static int seg6_input(struct sk_buff *skb)
static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
return dst_input(skb);
}

static int seg6_input_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
Expand Down Expand Up @@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb)
if (unlikely(err))
return err;

return dst_input(skb);
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, seg6_input_finish);

return seg6_input_finish(dev_net(skb->dev), NULL, skb);
}

static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
static int seg6_input_nf(struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct net *net = dev_net(skb->dev);

switch (skb->protocol) {
case htons(ETH_P_IP):
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
skb, NULL, dev, seg6_input_core);
case htons(ETH_P_IPV6):
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
skb, NULL, dev, seg6_input_core);
}

return -EINVAL;
}

static int seg6_input(struct sk_buff *skb)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return seg6_input_nf(skb);

return seg6_input_core(dev_net(skb->dev), NULL, skb);
}

static int seg6_output_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
Expand Down Expand Up @@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err))
goto drop;

if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, skb_dst(skb)->dev, dst_output);

return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
return err;
}

static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;

switch (skb->protocol) {
case htons(ETH_P_IP):
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
NULL, dev, seg6_output_core);
case htons(ETH_P_IPV6):
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
NULL, dev, seg6_output_core);
}

return -EINVAL;
}

static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return seg6_output_nf(net, sk, skb);

return seg6_output_core(net, sk, skb);
}

static int seg6_build_state(struct net *net, struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,
Expand Down
111 changes: 78 additions & 33 deletions net/ipv6/seg6_local.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#include <net/seg6_local.h>
#include <linux/etherdevice.h>
#include <linux/bpf.h>
#include <net/lwtunnel.h>
#include <linux/netfilter.h>

#define SEG6_F_ATTR(i) BIT(i)

Expand Down Expand Up @@ -413,12 +415,33 @@ static int input_action_end_dx2(struct sk_buff *skb,
return -EINVAL;
}

static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct in6_addr *nhaddr = NULL;
struct seg6_local_lwt *slwt;

slwt = seg6_local_lwtunnel(orig_dst->lwtstate);

/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
*
* If slwt->nh6 is set to ::, then lookup the nexthop for the
* inner packet's DA. Otherwise, use the specified nexthop.
*/
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;

seg6_lookup_nexthop(skb, nhaddr, 0);

return dst_input(skb);
}

/* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct in6_addr *nhaddr = NULL;

/* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH.
*/
Expand All @@ -429,55 +452,65 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;

/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
*
* If slwt->nh6 is set to ::, then lookup the nexthop for the
* inner packet's DA. Otherwise, use the specified nexthop.
*/

if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;

skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);

seg6_lookup_nexthop(skb, nhaddr, 0);
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, input_action_end_dx6_finish);

return dst_input(skb);
return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
}

static int input_action_end_dx4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_local_lwt *slwt;
struct iphdr *iph;
__be32 nhaddr;
int err;

if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;

if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;

skb->protocol = htons(ETH_P_IP);
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);

iph = ip_hdr(skb);

nhaddr = slwt->nh4.s_addr ?: iph->daddr;

skb_dst_drop(skb);

skb_set_transport_header(skb, sizeof(struct iphdr));

err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
if (err)
goto drop;
if (err) {
kfree_skb(skb);
return -EINVAL;
}

return dst_input(skb);
}

static int input_action_end_dx4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;

if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;

skb->protocol = htons(ETH_P_IP);
skb_set_transport_header(skb, sizeof(struct iphdr));
nf_reset_ct(skb);

if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, input_action_end_dx4_finish);

return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
Expand Down Expand Up @@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
skb_dst_drop(skb);

skb_set_transport_header(skb, hdrlen);
nf_reset_ct(skb);

return end_dt_vrf_rcv(skb, family, vrf);

Expand Down Expand Up @@ -1078,19 +1112,15 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
u64_stats_update_end(&pcounters->syncp);
}

static int seg6_local_input(struct sk_buff *skb)
static int seg6_local_input_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_action_desc *desc;
struct seg6_local_lwt *slwt;
unsigned int len = skb->len;
int rc;

if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
return -EINVAL;
}

slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc;

Expand All @@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb)
return rc;
}

static int seg6_local_input(struct sk_buff *skb)
{
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
return -EINVAL;
}

if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
seg6_local_input_core);

return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
}

static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
Expand Down
3 changes: 3 additions & 0 deletions net/netfilter/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/

# IPVS
obj-$(CONFIG_IP_VS) += ipvs/

# lwtunnel
obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o
15 changes: 15 additions & 0 deletions net/netfilter/nf_conntrack_standalone.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
#ifdef CONFIG_LWTUNNEL
#include <net/netfilter/nf_hooks_lwtunnel.h>
#endif
#include <linux/rculist_nulls.h>

static bool enable_hooks __read_mostly;
Expand Down Expand Up @@ -612,6 +615,9 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
#endif
#ifdef CONFIG_LWTUNNEL
NF_SYSCTL_CT_LWTUNNEL,
#endif

__NF_SYSCTL_CT_LAST_SYSCTL,
};
Expand Down Expand Up @@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
#endif
#ifdef CONFIG_LWTUNNEL
[NF_SYSCTL_CT_LWTUNNEL] = {
.procname = "nf_hooks_lwtunnel",
.data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = nf_hooks_lwtunnel_sysctl_handler,
},
#endif
{}
};
Expand Down
Loading

0 comments on commit 7a3f5b0

Please sign in to comment.