Skip to content

Commit

Permalink
pf|ipfw|netinet6?: IP forwarding rework
Browse files Browse the repository at this point in the history
This removes the if_output calls in the pf(4) code that escape further
processing by defering the forwarding execution to the network stack
using on/off style sysctls for both IPv4 and IPv6.

Also see: https://reviews.freebsd.org/D8877
  • Loading branch information
fichtner committed Nov 5, 2021
1 parent e511785 commit e3c5dc9
Show file tree
Hide file tree
Showing 12 changed files with 612 additions and 214 deletions.
61 changes: 27 additions & 34 deletions sys/netinet/ip_fastfwd.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,12 @@ ip_tryforward(struct mbuf *m)
struct mbuf *m0 = NULL;
struct nhop_object *nh = NULL;
struct route ro;
struct sockaddr_in *dst;
struct sockaddr_in *dst = NULL;
const struct sockaddr *gw;
struct in_addr dest, odest, rtdest;
uint16_t ip_len, ip_off;
int error = 0;
struct m_tag *fwd_tag = NULL;
struct ifnet *nifp = NULL;
struct mbuf *mcopy = NULL;
struct in_addr redest;
/*
Expand Down Expand Up @@ -345,23 +345,20 @@ ip_tryforward(struct mbuf *m)
/*
* Next hop forced by pfil(9) hook?
*/
if ((m->m_flags & M_IP_NEXTHOP) &&
((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
/*
* Now we will find route to forced destination.
*/
dest.s_addr = ((struct sockaddr_in *)
(fwd_tag + 1))->sin_addr.s_addr;
m_tag_delete(m, fwd_tag);
m->m_flags &= ~M_IP_NEXTHOP;
if (IP_HAS_NEXTHOP(m) && !ip_get_fwdtag(m, dst, &nifp)) {
dest.s_addr = dst->sin_addr.s_addr;
ip_flush_fwdtag(m);
}

/*
* Find route to destination.
*/
if (ip_findroute(&nh, dest, m) != 0)
if (!nifp && ip_findroute(&nh, dest, m) != 0)
return (NULL); /* icmp unreach already sent */

if (!nifp)
nifp = nh->nh_ifp;

/*
* Avoid second route lookup by caching destination.
*/
Expand All @@ -373,7 +370,7 @@ ip_tryforward(struct mbuf *m)
if (!PFIL_HOOKED_OUT(V_inet_pfil_head))
goto passout;

if (pfil_run_hooks(V_inet_pfil_head, &m, nh->nh_ifp,
if (pfil_run_hooks(V_inet_pfil_head, &m, nifp,
PFIL_OUT | PFIL_FWD, NULL) != PFIL_PASS)
goto drop;

Expand All @@ -386,11 +383,8 @@ ip_tryforward(struct mbuf *m)
/*
* Destination address changed?
*/
if (m->m_flags & M_IP_NEXTHOP)
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
else
fwd_tag = NULL;
if (odest.s_addr != dest.s_addr || fwd_tag != NULL) {
if (odest.s_addr != dest.s_addr || IP_HAS_NEXTHOP(m)) {
struct ifnet *nnifp = NULL;
/*
* Is it now for a local address on this host?
*/
Expand All @@ -405,15 +399,14 @@ ip_tryforward(struct mbuf *m)
/*
* Redo route lookup with new destination address
*/
if (fwd_tag) {
dest.s_addr = ((struct sockaddr_in *)
(fwd_tag + 1))->sin_addr.s_addr;
m_tag_delete(m, fwd_tag);
m->m_flags &= ~M_IP_NEXTHOP;
if (!ip_get_fwdtag(m, dst, &nnifp)) {
dest.s_addr = dst->sin_addr.s_addr;
ip_flush_fwdtag(m);
}
if (dest.s_addr != rtdest.s_addr &&
if (!nnifp && dest.s_addr != rtdest.s_addr &&
ip_findroute(&nh, dest, m) != 0)
return (NULL); /* icmp unreach already sent */
nifp = nnifp ? nnifp : nh->nh_ifp;
}

passout:
Expand All @@ -428,7 +421,7 @@ ip_tryforward(struct mbuf *m)
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = dest;
if (nh->nh_flags & NHF_GATEWAY) {
if (nh && nh->nh_ifp == nifp && nh->nh_flags & NHF_GATEWAY) {
gw = &nh->gw_sa;
ro.ro_flags |= RT_HAS_GW;
} else
Expand All @@ -438,39 +431,39 @@ ip_tryforward(struct mbuf *m)
* Handle redirect case.
*/
redest.s_addr = 0;
if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif) &&
if (V_ipsendredirects && (nifp == m->m_pkthdr.rcvif) &&
gw->sa_family == AF_INET)
mcopy = ip_redir_alloc(m, nh, ip, &redest.s_addr);

/*
* Check if packet fits MTU or if hardware will fragment for us
*/
if (ip_len <= nh->nh_mtu) {
if (ip_len <= nifp->if_mtu) {
/*
* Avoid confusing lower layers.
*/
m_clrprotoflags(m);
/*
* Send off the packet via outgoing interface
*/
IP_PROBE(send, NULL, NULL, ip, nh->nh_ifp, ip, NULL);
error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, gw, &ro);
IP_PROBE(send, NULL, NULL, ip, nifp, ip, NULL);
error = (*nifp->if_output)(nifp, m, gw, &ro);
} else {
/*
* Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
*/
if (ip_off & IP_DF) {
IPSTAT_INC(ips_cantfrag);
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
0, nh->nh_mtu);
0, nifp->if_mtu);
goto consumed;
} else {
/*
* We have to fragment the packet
*/
m->m_pkthdr.csum_flags |= CSUM_IP;
if (ip_fragment(ip, &m, nh->nh_mtu,
nh->nh_ifp->if_hwassist) != 0)
if (ip_fragment(ip, &m, nifp->if_mtu,
nifp->if_hwassist) != 0)
goto drop;
KASSERT(m != NULL, ("null mbuf and no error"));
/*
Expand All @@ -486,9 +479,9 @@ ip_tryforward(struct mbuf *m)
m_clrprotoflags(m);

IP_PROBE(send, NULL, NULL,
mtod(m, struct ip *), nh->nh_ifp,
mtod(m, struct ip *), nifp,
mtod(m, struct ip *), NULL);
error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m,
error = (*nifp->if_output)(nifp, m,
gw, &ro);
if (error)
break;
Expand Down
116 changes: 107 additions & 9 deletions sys/netinet/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,9 @@ extern int in_mcast_loop;
extern struct protosw inetsw[];

static inline int
ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags,
ip_output_pfil(struct mbuf **mp, struct ifnet **ifp, int flags,
struct inpcb *inp, struct sockaddr_in *dst, int *fibnum, int *error)
{
struct m_tag *fwd_tag = NULL;
struct mbuf *m;
struct in_addr odst;
struct ip *ip;
Expand All @@ -127,7 +126,7 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags,

/* Run through list of hooks for output packets. */
odst.s_addr = ip->ip_dst.s_addr;
switch (pfil_run_hooks(V_inet_pfil_head, mp, ifp, pflags, inp)) {
switch (pfil_run_hooks(V_inet_pfil_head, mp, *ifp, pflags, inp)) {
case PFIL_DROPPED:
*error = EACCES;
/* FALLTHROUGH */
Expand Down Expand Up @@ -196,12 +195,9 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags,
return 1; /* Finished */
}
/* Or forward to some other address? */
if ((m->m_flags & M_IP_NEXTHOP) &&
((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
if (IP_HAS_NEXTHOP(m) && !ip_get_fwdtag(m, dst, ifp)) {
m->m_flags |= M_SKIP_FIREWALL;
m->m_flags &= ~M_IP_NEXTHOP;
m_tag_delete(m, fwd_tag);
ip_flush_fwdtag(m);

return -1; /* Reloop for CHANGE of dst */
}
Expand Down Expand Up @@ -696,7 +692,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,

/* Jump over all PFIL processing if hooks are not active. */
if (PFIL_HOOKED_OUT(V_inet_pfil_head)) {
switch (ip_output_pfil(&m, ifp, flags, inp, dst, &fibnum,
struct ifnet *oifp = ifp;
switch (ip_output_pfil(&m, &ifp, flags, inp, dst, &fibnum,
&error)) {
case 1: /* Finished */
goto done;
Expand All @@ -713,6 +710,18 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
}
gw = (const struct sockaddr *)dst;
ip = mtod(m, struct ip *);
if (oifp != ifp) {
/*
* pf(4) decided on interface so
* emulate the previous way of
* if_output() with new ifp/dst
* combo and no route set.
*/
IFP_TO_IA(ifp, ia, &in_ifa_tracker);
mtu = ifp->if_mtu;
ro = NULL;
break;
}
goto again;
}
}
Expand Down Expand Up @@ -1620,3 +1629,92 @@ ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen)
if_simloop(ifp, copym, AF_INET, 0);
}
}

struct ip_fwdtag {
struct sockaddr_in dst;
u_short if_index;
};

int
ip_set_fwdtag(struct mbuf *m, struct sockaddr_in *dst, struct ifnet *ifp)
{
struct ip_fwdtag *fwd_info;
struct m_tag *fwd_tag;

KASSERT(dst != NULL, ("%s: !dst", __func__));
KASSERT(dst->sin_family == AF_INET, ("%s: !AF_INET", __func__));

fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
if (fwd_tag != NULL) {
KASSERT(((struct ip_fwdtag *)(fwd_tag+1))->dst.sin_family ==
AF_INET, ("%s: !AF_INET", __func__));

m_tag_unlink(m, fwd_tag);
} else {
fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, sizeof(*fwd_info),
M_NOWAIT);
if (fwd_tag == NULL) {
return (ENOBUFS);
}
}

fwd_info = (struct ip_fwdtag *)(fwd_tag+1);

bcopy(dst, &fwd_info->dst, sizeof(fwd_info->dst));
fwd_info->if_index = ifp ? ifp->if_index : 0;
m->m_flags |= M_IP_NEXTHOP;

if (in_localip(fwd_info->dst.sin_addr))
m->m_flags |= M_FASTFWD_OURS;
else
m->m_flags &= ~M_FASTFWD_OURS;

m_tag_prepend(m, fwd_tag);

return (0);
}

int
ip_get_fwdtag(struct mbuf *m, struct sockaddr_in *dst, struct ifnet **ifp)
{
struct ip_fwdtag *fwd_info;
struct m_tag *fwd_tag;

fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
if (fwd_tag == NULL) {
return (ENOENT);
}

fwd_info = (struct ip_fwdtag *)(fwd_tag+1);

KASSERT(((struct sockaddr *)&fwd_info->dst)->sa_family == AF_INET,
("%s: !AF_INET", __func__));

if (dst != NULL) {
bcopy(&fwd_info->dst, dst, sizeof(*dst));
}

if (ifp != NULL && fwd_info->if_index != 0) {
struct ifnet *nifp = ifnet_byindex(fwd_info->if_index);
if (nifp != NULL) {
*ifp = nifp;
}
}

return (0);
}

void
ip_flush_fwdtag(struct mbuf *m)
{
struct m_tag *fwd_tag;

fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
if (fwd_tag != NULL) {
KASSERT(((struct sockaddr *)(fwd_tag+1))->sa_family ==
AF_INET, ("%s: !AF_INET", __func__));

m->m_flags &= ~(M_IP_NEXTHOP | M_FASTFWD_OURS);
m_tag_delete(m, fwd_tag);
}
}
5 changes: 5 additions & 0 deletions sys/netinet/ip_var.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,11 @@ extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
extern void (*ip_rsvp_force_done)(struct socket *);
extern int (*rsvp_input_p)(struct mbuf **, int *, int);

#define IP_HAS_NEXTHOP(m) ((m)->m_flags & M_IP_NEXTHOP)
int ip_set_fwdtag(struct mbuf *, struct sockaddr_in *, struct ifnet *);
int ip_get_fwdtag(struct mbuf *, struct sockaddr_in *, struct ifnet **);
void ip_flush_fwdtag(struct mbuf *);

VNET_DECLARE(struct pfil_head *, inet_pfil_head);
#define V_inet_pfil_head VNET(inet_pfil_head)
#define PFIL_INET_NAME "inet"
Expand Down
Loading

0 comments on commit e3c5dc9

Please sign in to comment.