From e3c5dc9e3162483d43aaf44a3d17ee444cbdd178 Mon Sep 17 00:00:00 2001 From: Franco Fichtner Date: Wed, 18 Aug 2021 12:29:13 +0200 Subject: [PATCH] pf|ipfw|netinet6?: IP forwarding rework This removes the if_output calls in the pf(4) code that escape further processing by defering the forwarding execution to the network stack using on/off style sysctls for both IPv4 and IPv6. Also see: https://reviews.freebsd.org/D8877 --- sys/netinet/ip_fastfwd.c | 61 ++++----- sys/netinet/ip_output.c | 116 +++++++++++++++-- sys/netinet/ip_var.h | 5 + sys/netinet/tcp_input.c | 42 ++---- sys/netinet/udp_usrreq.c | 17 +-- sys/netinet6/ip6_fastfwd.c | 73 +++++------ sys/netinet6/ip6_forward.c | 39 +++--- sys/netinet6/ip6_output.c | 139 ++++++++++++++++++-- sys/netinet6/ip6_var.h | 5 + sys/netinet6/udp6_usrreq.c | 17 +-- sys/netpfil/ipfw/ip_fw_pfil.c | 74 ++++------- sys/netpfil/pf/pf.c | 238 +++++++++++++++++++++++++++++++++- 12 files changed, 612 insertions(+), 214 deletions(-) diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index facf876f18cc..e5a1ba4a64d4 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -200,12 +200,12 @@ ip_tryforward(struct mbuf *m) struct mbuf *m0 = NULL; struct nhop_object *nh = NULL; struct route ro; - struct sockaddr_in *dst; + struct sockaddr_in *dst = NULL; const struct sockaddr *gw; struct in_addr dest, odest, rtdest; uint16_t ip_len, ip_off; int error = 0; - struct m_tag *fwd_tag = NULL; + struct ifnet *nifp = NULL; struct mbuf *mcopy = NULL; struct in_addr redest; /* @@ -345,23 +345,20 @@ ip_tryforward(struct mbuf *m) /* * Next hop forced by pfil(9) hook? */ - if ((m->m_flags & M_IP_NEXTHOP) && - ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) { - /* - * Now we will find route to forced destination. - */ - dest.s_addr = ((struct sockaddr_in *) - (fwd_tag + 1))->sin_addr.s_addr; - m_tag_delete(m, fwd_tag); - m->m_flags &= ~M_IP_NEXTHOP; + if (IP_HAS_NEXTHOP(m) && !ip_get_fwdtag(m, dst, &nifp)) { + dest.s_addr = dst->sin_addr.s_addr; + ip_flush_fwdtag(m); } /* * Find route to destination. */ - if (ip_findroute(&nh, dest, m) != 0) + if (!nifp && ip_findroute(&nh, dest, m) != 0) return (NULL); /* icmp unreach already sent */ + if (!nifp) + nifp = nh->nh_ifp; + /* * Avoid second route lookup by caching destination. */ @@ -373,7 +370,7 @@ ip_tryforward(struct mbuf *m) if (!PFIL_HOOKED_OUT(V_inet_pfil_head)) goto passout; - if (pfil_run_hooks(V_inet_pfil_head, &m, nh->nh_ifp, + if (pfil_run_hooks(V_inet_pfil_head, &m, nifp, PFIL_OUT | PFIL_FWD, NULL) != PFIL_PASS) goto drop; @@ -386,11 +383,8 @@ ip_tryforward(struct mbuf *m) /* * Destination address changed? */ - if (m->m_flags & M_IP_NEXTHOP) - fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); - else - fwd_tag = NULL; - if (odest.s_addr != dest.s_addr || fwd_tag != NULL) { + if (odest.s_addr != dest.s_addr || IP_HAS_NEXTHOP(m)) { + struct ifnet *nnifp = NULL; /* * Is it now for a local address on this host? */ @@ -405,15 +399,14 @@ ip_tryforward(struct mbuf *m) /* * Redo route lookup with new destination address */ - if (fwd_tag) { - dest.s_addr = ((struct sockaddr_in *) - (fwd_tag + 1))->sin_addr.s_addr; - m_tag_delete(m, fwd_tag); - m->m_flags &= ~M_IP_NEXTHOP; + if (!ip_get_fwdtag(m, dst, &nnifp)) { + dest.s_addr = dst->sin_addr.s_addr; + ip_flush_fwdtag(m); } - if (dest.s_addr != rtdest.s_addr && + if (!nnifp && dest.s_addr != rtdest.s_addr && ip_findroute(&nh, dest, m) != 0) return (NULL); /* icmp unreach already sent */ + nifp = nnifp ? nnifp : nh->nh_ifp; } passout: @@ -428,7 +421,7 @@ ip_tryforward(struct mbuf *m) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = dest; - if (nh->nh_flags & NHF_GATEWAY) { + if (nh && nh->nh_ifp == nifp && nh->nh_flags & NHF_GATEWAY) { gw = &nh->gw_sa; ro.ro_flags |= RT_HAS_GW; } else @@ -438,14 +431,14 @@ ip_tryforward(struct mbuf *m) * Handle redirect case. */ redest.s_addr = 0; - if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif) && + if (V_ipsendredirects && (nifp == m->m_pkthdr.rcvif) && gw->sa_family == AF_INET) mcopy = ip_redir_alloc(m, nh, ip, &redest.s_addr); /* * Check if packet fits MTU or if hardware will fragment for us */ - if (ip_len <= nh->nh_mtu) { + if (ip_len <= nifp->if_mtu) { /* * Avoid confusing lower layers. */ @@ -453,8 +446,8 @@ ip_tryforward(struct mbuf *m) /* * Send off the packet via outgoing interface */ - IP_PROBE(send, NULL, NULL, ip, nh->nh_ifp, ip, NULL); - error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, gw, &ro); + IP_PROBE(send, NULL, NULL, ip, nifp, ip, NULL); + error = (*nifp->if_output)(nifp, m, gw, &ro); } else { /* * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery @@ -462,15 +455,15 @@ ip_tryforward(struct mbuf *m) if (ip_off & IP_DF) { IPSTAT_INC(ips_cantfrag); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, - 0, nh->nh_mtu); + 0, nifp->if_mtu); goto consumed; } else { /* * We have to fragment the packet */ m->m_pkthdr.csum_flags |= CSUM_IP; - if (ip_fragment(ip, &m, nh->nh_mtu, - nh->nh_ifp->if_hwassist) != 0) + if (ip_fragment(ip, &m, nifp->if_mtu, + nifp->if_hwassist) != 0) goto drop; KASSERT(m != NULL, ("null mbuf and no error")); /* @@ -486,9 +479,9 @@ ip_tryforward(struct mbuf *m) m_clrprotoflags(m); IP_PROBE(send, NULL, NULL, - mtod(m, struct ip *), nh->nh_ifp, + mtod(m, struct ip *), nifp, mtod(m, struct ip *), NULL); - error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, + error = (*nifp->if_output)(nifp, m, gw, &ro); if (error) break; diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index c269fca42015..1396fa6f7aa5 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -110,10 +110,9 @@ extern int in_mcast_loop; extern struct protosw inetsw[]; static inline int -ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags, +ip_output_pfil(struct mbuf **mp, struct ifnet **ifp, int flags, struct inpcb *inp, struct sockaddr_in *dst, int *fibnum, int *error) { - struct m_tag *fwd_tag = NULL; struct mbuf *m; struct in_addr odst; struct ip *ip; @@ -127,7 +126,7 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags, /* Run through list of hooks for output packets. */ odst.s_addr = ip->ip_dst.s_addr; - switch (pfil_run_hooks(V_inet_pfil_head, mp, ifp, pflags, inp)) { + switch (pfil_run_hooks(V_inet_pfil_head, mp, *ifp, pflags, inp)) { case PFIL_DROPPED: *error = EACCES; /* FALLTHROUGH */ @@ -196,12 +195,9 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags, return 1; /* Finished */ } /* Or forward to some other address? */ - if ((m->m_flags & M_IP_NEXTHOP) && - ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) { - bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); + if (IP_HAS_NEXTHOP(m) && !ip_get_fwdtag(m, dst, ifp)) { m->m_flags |= M_SKIP_FIREWALL; - m->m_flags &= ~M_IP_NEXTHOP; - m_tag_delete(m, fwd_tag); + ip_flush_fwdtag(m); return -1; /* Reloop for CHANGE of dst */ } @@ -696,7 +692,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, /* Jump over all PFIL processing if hooks are not active. */ if (PFIL_HOOKED_OUT(V_inet_pfil_head)) { - switch (ip_output_pfil(&m, ifp, flags, inp, dst, &fibnum, + struct ifnet *oifp = ifp; + switch (ip_output_pfil(&m, &ifp, flags, inp, dst, &fibnum, &error)) { case 1: /* Finished */ goto done; @@ -713,6 +710,18 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, } gw = (const struct sockaddr *)dst; ip = mtod(m, struct ip *); + if (oifp != ifp) { + /* + * pf(4) decided on interface so + * emulate the previous way of + * if_output() with new ifp/dst + * combo and no route set. + */ + IFP_TO_IA(ifp, ia, &in_ifa_tracker); + mtu = ifp->if_mtu; + ro = NULL; + break; + } goto again; } } @@ -1620,3 +1629,92 @@ ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen) if_simloop(ifp, copym, AF_INET, 0); } } + +struct ip_fwdtag { + struct sockaddr_in dst; + u_short if_index; +}; + +int +ip_set_fwdtag(struct mbuf *m, struct sockaddr_in *dst, struct ifnet *ifp) +{ + struct ip_fwdtag *fwd_info; + struct m_tag *fwd_tag; + + KASSERT(dst != NULL, ("%s: !dst", __func__)); + KASSERT(dst->sin_family == AF_INET, ("%s: !AF_INET", __func__)); + + fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + if (fwd_tag != NULL) { + KASSERT(((struct ip_fwdtag *)(fwd_tag+1))->dst.sin_family == + AF_INET, ("%s: !AF_INET", __func__)); + + m_tag_unlink(m, fwd_tag); + } else { + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, sizeof(*fwd_info), + M_NOWAIT); + if (fwd_tag == NULL) { + return (ENOBUFS); + } + } + + fwd_info = (struct ip_fwdtag *)(fwd_tag+1); + + bcopy(dst, &fwd_info->dst, sizeof(fwd_info->dst)); + fwd_info->if_index = ifp ? ifp->if_index : 0; + m->m_flags |= M_IP_NEXTHOP; + + if (in_localip(fwd_info->dst.sin_addr)) + m->m_flags |= M_FASTFWD_OURS; + else + m->m_flags &= ~M_FASTFWD_OURS; + + m_tag_prepend(m, fwd_tag); + + return (0); +} + +int +ip_get_fwdtag(struct mbuf *m, struct sockaddr_in *dst, struct ifnet **ifp) +{ + struct ip_fwdtag *fwd_info; + struct m_tag *fwd_tag; + + fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + if (fwd_tag == NULL) { + return (ENOENT); + } + + fwd_info = (struct ip_fwdtag *)(fwd_tag+1); + + KASSERT(((struct sockaddr *)&fwd_info->dst)->sa_family == AF_INET, + ("%s: !AF_INET", __func__)); + + if (dst != NULL) { + bcopy(&fwd_info->dst, dst, sizeof(*dst)); + } + + if (ifp != NULL && fwd_info->if_index != 0) { + struct ifnet *nifp = ifnet_byindex(fwd_info->if_index); + if (nifp != NULL) { + *ifp = nifp; + } + } + + return (0); +} + +void +ip_flush_fwdtag(struct mbuf *m) +{ + struct m_tag *fwd_tag; + + fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + if (fwd_tag != NULL) { + KASSERT(((struct sockaddr *)(fwd_tag+1))->sa_family == + AF_INET, ("%s: !AF_INET", __func__)); + + m->m_flags &= ~(M_IP_NEXTHOP | M_FASTFWD_OURS); + m_tag_delete(m, fwd_tag); + } +} diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 77b6ee88507a..2ca24bafe48c 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -244,6 +244,11 @@ extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *); extern void (*ip_rsvp_force_done)(struct socket *); extern int (*rsvp_input_p)(struct mbuf **, int *, int); +#define IP_HAS_NEXTHOP(m) ((m)->m_flags & M_IP_NEXTHOP) +int ip_set_fwdtag(struct mbuf *, struct sockaddr_in *, struct ifnet *); +int ip_get_fwdtag(struct mbuf *, struct sockaddr_in *, struct ifnet **); +void ip_flush_fwdtag(struct mbuf *); + VNET_DECLARE(struct pfil_head *, inet_pfil_head); #define V_inet_pfil_head VNET(inet_pfil_head) #define PFIL_INET_NAME "inet" diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index f7ca4ce05200..de70d59418ef 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -634,6 +634,7 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) int off0; int optlen = 0; #ifdef INET + struct sockaddr_in next_hop; int len; uint8_t ipttl; #endif @@ -642,8 +643,8 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) int thflags; int rstreason = 0; /* For badport_bandlim accounting purposes */ uint8_t iptos; - struct m_tag *fwd_tag = NULL; #ifdef INET6 + struct sockaddr_in6 next_hop6; struct ip6_hdr *ip6 = NULL; int isipv6; #else @@ -828,28 +829,10 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) */ drop_hdrlen = off0 + off; - /* - * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. - */ - if ( -#ifdef INET6 - (isipv6 && (m->m_flags & M_IP6_NEXTHOP)) -#ifdef INET - || (!isipv6 && (m->m_flags & M_IP_NEXTHOP)) -#endif -#endif -#if defined(INET) && !defined(INET6) - (m->m_flags & M_IP_NEXTHOP) -#endif - ) - fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); - findpcb: #ifdef INET6 - if (isipv6 && fwd_tag != NULL) { - struct sockaddr_in6 *next_hop6; - - next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); + if (isipv6 && IP6_HAS_NEXTHOP(m) && + !ip6_get_fwdtag(m, &next_hop6, NULL)) { /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? @@ -864,11 +847,13 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) * any hardware-generated hash is ignored. */ inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src, - th->th_sport, &next_hop6->sin6_addr, - next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) : + th->th_sport, &next_hop6.sin6_addr, + next_hop6.sin6_port ? ntohs(next_hop6.sin6_port) : th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); } + /* Remove the tag from the packet. We don't need it anymore. */ + ip_flush_fwdtag(m); } else if (isipv6) { inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, @@ -880,10 +865,7 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) else #endif #ifdef INET - if (fwd_tag != NULL) { - struct sockaddr_in *next_hop; - - next_hop = (struct sockaddr_in *)(fwd_tag+1); + if (IP_HAS_NEXTHOP(m) && !ip_get_fwdtag(m, &next_hop, NULL)) { /* * Transparently forwarded. Pretend to be the destination. * already got one like this? @@ -898,11 +880,13 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) * any hardware-generated hash is ignored. */ inp = in_pcblookup(&V_tcbinfo, ip->ip_src, - th->th_sport, next_hop->sin_addr, - next_hop->sin_port ? ntohs(next_hop->sin_port) : + th->th_sport, next_hop.sin_addr, + next_hop.sin_port ? ntohs(next_hop.sin_port) : th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); } + /* Remove the tag from the packet. We don't need it anymore. */ + ip6_flush_fwdtag(m); } else inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, ip->ip_dst, th->th_dport, diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 76ed063391eb..91af056db0a5 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -399,9 +399,8 @@ udp_input(struct mbuf **mp, int *offp, int proto) uint16_t len, ip_len; struct inpcbinfo *pcbinfo; struct ip save_ip; - struct sockaddr_in udp_in[2]; + struct sockaddr_in udp_in[2], next_hop; struct mbuf *m; - struct m_tag *fwd_tag; int cscov_partial, iphlen; m = *mp; @@ -660,12 +659,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ - if ((m->m_flags & M_IP_NEXTHOP) && - (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { - struct sockaddr_in *next_hop; - - next_hop = (struct sockaddr_in *)(fwd_tag + 1); - + if (IP_HAS_NEXTHOP(m) && !ip_get_fwdtag(m, &next_hop, NULL)) { /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? @@ -679,14 +673,13 @@ udp_input(struct mbuf **mp, int *offp, int proto) * any hardware-generated hash is ignored. */ inp = in_pcblookup(pcbinfo, ip->ip_src, - uh->uh_sport, next_hop->sin_addr, - next_hop->sin_port ? htons(next_hop->sin_port) : + uh->uh_sport, next_hop.sin_addr, + next_hop.sin_port ? htons(next_hop.sin_port) : uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, ifp); } /* Remove the tag from the packet. We don't need it anymore. */ - m_tag_delete(m, fwd_tag); - m->m_flags &= ~M_IP_NEXTHOP; + ip_flush_fwdtag(m); } else inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD | diff --git a/sys/netinet6/ip6_fastfwd.c b/sys/netinet6/ip6_fastfwd.c index b6a8af8013c4..1043b07a6127 100644 --- a/sys/netinet6/ip6_fastfwd.c +++ b/sys/netinet6/ip6_fastfwd.c @@ -92,12 +92,11 @@ struct mbuf* ip6_tryforward(struct mbuf *m) { struct sockaddr_in6 dst; - struct nhop_object *nh; - struct m_tag *fwd_tag; + struct nhop_object *nh = NULL; struct ip6_hdr *ip6; - struct ifnet *rcvif; + struct ifnet *rcvif, *nifp = NULL; uint32_t plen; - int error; + int error, mtu = 0; /* * Fallback conditions to ip6_input for slow path processing. @@ -172,22 +171,16 @@ ip6_tryforward(struct mbuf *m) * that new destination or next hop is our local address. * So, we can just go back to ip6_input. * XXX: should we decrement ip6_hlim in such case? - * - * Also it can forward packet to another destination, e.g. - * M_IP6_NEXTHOP flag is set and fwd_tag is attached to mbuf. */ if (m->m_flags & M_FASTFWD_OURS) return (m); ip6 = mtod(m, struct ip6_hdr *); - if ((m->m_flags & M_IP6_NEXTHOP) && - (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { + if (IP6_HAS_NEXTHOP(m) && !ip6_get_fwdtag(m, &dst, &nifp)) { /* * Now we will find route to forwarded by pfil destination. */ - bcopy((fwd_tag + 1), &dst, sizeof(dst)); - m->m_flags &= ~M_IP6_NEXTHOP; - m_tag_delete(m, fwd_tag); + ip6_flush_fwdtag(m); } else { /* Update dst since pfil could change it */ dst.sin6_addr = ip6->ip6_dst; @@ -196,15 +189,18 @@ ip6_tryforward(struct mbuf *m) /* * Find route to destination. */ - if (ip6_findroute(&nh, &dst, m) != 0) { + if (!nifp && ip6_findroute(&nh, &dst, m) != 0) { m = NULL; in6_ifstat_inc(rcvif, ifs6_in_noroute); goto dropin; } + if (!nifp) + nifp = nh->nh_ifp; + mtu = IN6_LINKMTU(nifp); if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) { - if (m->m_pkthdr.len > nh->nh_mtu) { - in6_ifstat_inc(nh->nh_ifp, ifs6_in_toobig); - icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, nh->nh_mtu); + if (m->m_pkthdr.len > mtu) { + in6_ifstat_inc(nifp, ifs6_in_toobig); + icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, mtu); m = NULL; goto dropout; } @@ -214,17 +210,19 @@ ip6_tryforward(struct mbuf *m) /* * Outgoing packet firewall processing. */ - if (pfil_run_hooks(V_inet6_pfil_head, &m, nh->nh_ifp, PFIL_OUT | + if (pfil_run_hooks(V_inet6_pfil_head, &m, nifp, PFIL_OUT | PFIL_FWD, NULL) != PFIL_PASS) goto dropout; /* * We used slow path processing for packets with scoped addresses. * So, scope checks aren't needed here. + * + * XXX this one is rather strange. Shouldn't we switch nh first? */ - if (m->m_pkthdr.len > nh->nh_mtu) { - in6_ifstat_inc(nh->nh_ifp, ifs6_in_toobig); - icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, nh->nh_mtu); + if (m->m_pkthdr.len > mtu) { + in6_ifstat_inc(nifp, ifs6_in_toobig); + icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, mtu); m = NULL; goto dropout; } @@ -233,9 +231,6 @@ ip6_tryforward(struct mbuf *m) * If packet filter sets the M_FASTFWD_OURS flag, this means * that new destination or next hop is our local address. * So, we can just go back to ip6_input. - * - * Also it can forward packet to another destination, e.g. - * M_IP6_NEXTHOP flag is set and fwd_tag is attached to mbuf. */ if (m->m_flags & M_FASTFWD_OURS) { /* @@ -248,26 +243,21 @@ ip6_tryforward(struct mbuf *m) * Again. A packet filter could change the destination address. */ ip6 = mtod(m, struct ip6_hdr *); - if (m->m_flags & M_IP6_NEXTHOP) - fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); - else - fwd_tag = NULL; - - if (fwd_tag != NULL || + if (IP6_HAS_NEXTHOP(m) || !IN6_ARE_ADDR_EQUAL(&dst.sin6_addr, &ip6->ip6_dst)) { - if (fwd_tag != NULL) { - bcopy((fwd_tag + 1), &dst, sizeof(dst)); - m->m_flags &= ~M_IP6_NEXTHOP; - m_tag_delete(m, fwd_tag); - } else + struct ifnet *nnifp = NULL; + if (!ip6_get_fwdtag(m, &dst, &nnifp)) + ip6_flush_fwdtag(m); + else dst.sin6_addr = ip6->ip6_dst; /* * Redo route lookup with new destination address */ - if (ip6_findroute(&nh, &dst, m) != 0) { + if (!nnifp && ip6_findroute(&nh, &dst, m) != 0) { m = NULL; goto dropout; } + nifp = nnifp ? nnifp : nh->nh_ifp; } passout: #ifdef IPSTEALTH @@ -278,17 +268,16 @@ ip6_tryforward(struct mbuf *m) } m_clrprotoflags(m); /* Avoid confusing lower layers. */ - IP_PROBE(send, NULL, NULL, ip6, nh->nh_ifp, NULL, ip6); + IP_PROBE(send, NULL, NULL, ip6, nifp, NULL, ip6); - if (nh->nh_flags & NHF_GATEWAY) + if (nh && nh->nh_ifp == nifp && nh->nh_flags & NHF_GATEWAY) dst.sin6_addr = nh->gw6_sa.sin6_addr; - error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, - (struct sockaddr *)&dst, NULL); + error = (*nifp->if_output)(nifp, m, (struct sockaddr *)&dst, NULL); if (error != 0) { - in6_ifstat_inc(nh->nh_ifp, ifs6_out_discard); + in6_ifstat_inc(nifp, ifs6_out_discard); IP6STAT_INC(ip6s_cantforward); } else { - in6_ifstat_inc(nh->nh_ifp, ifs6_out_forward); + in6_ifstat_inc(nifp, ifs6_out_forward); IP6STAT_INC(ip6s_forward); } return (NULL); @@ -296,7 +285,7 @@ ip6_tryforward(struct mbuf *m) in6_ifstat_inc(rcvif, ifs6_in_discard); goto drop; dropout: - in6_ifstat_inc(nh->nh_ifp, ifs6_out_discard); + in6_ifstat_inc(nifp, ifs6_out_discard); drop: if (m != NULL) m_freem(m); diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index d4306eea416f..64eb867160bb 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -92,14 +92,14 @@ void ip6_forward(struct mbuf *m, int srcrt) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - struct sockaddr_in6 dst; + struct sockaddr_in6 dst, gw6; struct nhop_object *nh = NULL; int error, type = 0, code = 0; struct mbuf *mcopy = NULL; struct ifnet *origifp; /* maybe unnecessary */ + struct ifnet *nifp = NULL; u_int32_t inzone, outzone; struct in6_addr odst; - struct m_tag *fwd_tag; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; /* @@ -352,30 +352,35 @@ ip6_forward(struct mbuf *m, int srcrt) goto out; } /* Or forward to some other address? */ - if ((m->m_flags & M_IP6_NEXTHOP) && - (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { - struct sockaddr_in6 *gw6 = (struct sockaddr_in6 *)(fwd_tag + 1); - + if (IP6_HAS_NEXTHOP(m) && !ip6_get_fwdtag(m, &gw6, &nifp)) { /* Update address and scopeid. Assume scope is embedded */ - dst.sin6_scope_id = ntohs(in6_getscope(&gw6->sin6_addr)); - dst.sin6_addr = gw6->sin6_addr; + dst.sin6_scope_id = ntohs(in6_getscope(&gw6.sin6_addr)); + dst.sin6_addr = gw6.sin6_addr; in6_clearscope(&dst.sin6_addr); m->m_flags |= M_SKIP_FIREWALL; - m->m_flags &= ~M_IP6_NEXTHOP; - m_tag_delete(m, fwd_tag); + ip6_flush_fwdtag(m); NH_FREE(nh); - goto again; + if (!nifp) + goto again; + /* XXX for safety but not sure */ + if ((nifp->if_flags & IFF_LOOPBACK) != 0) + origifp = m->m_pkthdr.rcvif; + else + origifp = nifp; } pass: + if (!nifp) + nifp = nh->nh_ifp; + /* See if the size was changed by the packet filter. */ /* TODO: change to nh->nh_mtu */ - if (m->m_pkthdr.len > IN6_LINKMTU(nh->nh_ifp)) { - in6_ifstat_inc(nh->nh_ifp, ifs6_in_toobig); + if (m->m_pkthdr.len > IN6_LINKMTU(nifp)) { + in6_ifstat_inc(nifp, ifs6_in_toobig); if (mcopy) icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, - IN6_LINKMTU(nh->nh_ifp)); + IN6_LINKMTU(nifp)); goto bad; } @@ -384,13 +389,13 @@ ip6_forward(struct mbuf *m, int srcrt) in6_set_unicast_scopeid(&dst.sin6_addr, dst.sin6_scope_id); dst.sin6_scope_id = 0; } - error = nd6_output_ifp(nh->nh_ifp, origifp, m, &dst, NULL); + error = nd6_output_ifp(nifp, origifp, m, &dst, NULL); if (error) { - in6_ifstat_inc(nh->nh_ifp, ifs6_out_discard); + in6_ifstat_inc(nifp, ifs6_out_discard); IP6STAT_INC(ip6s_cantforward); } else { IP6STAT_INC(ip6s_forward); - in6_ifstat_inc(nh->nh_ifp, ifs6_out_forward); + in6_ifstat_inc(nifp, ifs6_out_forward); if (type) IP6STAT_INC(ip6s_redirectsent); else { diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 2b49a9f7c351..71a755945959 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -426,6 +426,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, { struct ip6_hdr *ip6; struct ifnet *ifp, *origifp; + struct ifnet *nifp = NULL; struct mbuf *m = m0; struct mbuf *mprev; struct route_in6 *ro_pmtu; @@ -447,7 +448,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, int sw_csum, tso; int needfiblookup; uint32_t fibnum; - struct m_tag *fwd_tag = NULL; + int has_fwd_tag = 0; uint32_t id; NET_EPOCH_ASSERT(); @@ -693,13 +694,13 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, NH_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum); } - if (ro->ro_nh != NULL && fwd_tag == NULL && + if (ro->ro_nh != NULL && !has_fwd_tag && (!NH_IS_VALID(ro->ro_nh) || ro->ro_dst.sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst))) RO_INVALIDATE_CACHE(ro); - if (ro->ro_nh != NULL && fwd_tag == NULL && + if (ro->ro_nh != NULL && !has_fwd_tag && ro->ro_dst.sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) { nh = ro->ro_nh; @@ -708,7 +709,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, if (ro->ro_lle) LLE_FREE(ro->ro_lle); /* zeros ro_lle */ ro->ro_lle = NULL; - if (fwd_tag == NULL) { + if (!has_fwd_tag) { bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); @@ -742,7 +743,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct in6_addr kdst; uint32_t scopeid; - if (fwd_tag == NULL) { + if (!has_fwd_tag) { bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); @@ -1088,17 +1089,31 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, goto done; } /* Or forward to some other address? */ - if ((m->m_flags & M_IP6_NEXTHOP) && - (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { + if (IP6_HAS_NEXTHOP(m) && !ip6_get_fwdtag(m, &dst_sa, &nifp)) { if (ro != NULL) dst = (struct sockaddr_in6 *)&ro->ro_dst; else dst = &sin6; - bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6)); m->m_flags |= M_SKIP_FIREWALL; - m->m_flags &= ~M_IP6_NEXTHOP; - m_tag_delete(m, fwd_tag); - goto again; + ip6_flush_fwdtag(m); + has_fwd_tag = 1; + if (!nifp) + goto again; + /* XXX for safety but not sure */ + if ((nifp->if_flags & IFF_LOOPBACK) != 0) + origifp = m->m_pkthdr.rcvif; + else + origifp = nifp; + ifp = nifp; + /* + * pf(4) decided on interface so + * emulate the previous way of + * if_output() with new ifp/dst + * combo and no route set. + */ + ia = in6_ifawithifp(ifp, &ip6->ip6_src); + mtu = ifp->if_mtu; + ro = NULL; } passout: @@ -3374,3 +3389,105 @@ ip6_optlen(struct inpcb *inp) return len; #undef elen } + +struct ip6_fwdtag { + struct sockaddr_in6 dst; + u_short if_index; +}; + +int +ip6_set_fwdtag(struct mbuf *m, struct sockaddr_in6 *dst, struct ifnet *ifp) +{ + struct ip6_fwdtag *fwd_info; + struct m_tag *fwd_tag; + int error; + + KASSERT(dst != NULL, ("%s: !dst", __func__)); + KASSERT(dst->sin6_family == AF_INET6, ("%s: !AF_INET6", __func__)); + + fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + if (fwd_tag != NULL) { + KASSERT(((struct ip6_fwdtag *)(fwd_tag+1))->dst.sin6_family == + AF_INET6, ("%s: !AF_INET6", __func__)); + + m_tag_unlink(m, fwd_tag); + } else { + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, sizeof(*dst), + M_NOWAIT); + if (fwd_tag == NULL) { + return (ENOBUFS); + } + } + + fwd_info = (struct ip6_fwdtag *)(fwd_tag+1); + + bcopy(dst, &fwd_info->dst, sizeof(fwd_info->dst)); + + /* + * If nh6 address is link-local we should convert + * it to kernel internal form before doing any + * comparisons. + */ + error = sa6_embedscope(&fwd_info->dst, V_ip6_use_defzone); + if (error != 0) { + m_tag_free(fwd_tag); + return (error); + } + + if (in6_localip(&fwd_info->dst.sin6_addr)) + m->m_flags |= M_FASTFWD_OURS; + else + m->m_flags &= ~M_FASTFWD_OURS; + + fwd_info->if_index = ifp ? ifp->if_index : 0; + m->m_flags |= M_IP6_NEXTHOP; + + m_tag_prepend(m, fwd_tag); + + return (0); +} + +int +ip6_get_fwdtag(struct mbuf *m, struct sockaddr_in6 *dst, struct ifnet **ifp) +{ + struct ip6_fwdtag *fwd_info; + struct m_tag *fwd_tag; + + fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + if (fwd_tag == NULL) { + return (ENOENT); + } + + fwd_info = (struct ip6_fwdtag *)(fwd_tag+1); + + KASSERT(((struct sockaddr *)&fwd_info->dst)->sa_family == AF_INET6, + ("%s: !AF_INET6", __func__)); + + if (dst != NULL) { + bcopy((fwd_tag+1), dst, sizeof(*dst)); + } + + if (ifp != NULL && fwd_info->if_index != 0) { + struct ifnet *nifp = ifnet_byindex(fwd_info->if_index); + if (nifp != NULL) { + *ifp = nifp; + } + } + + return (0); +} + +void +ip6_flush_fwdtag(struct mbuf *m) +{ + struct m_tag *fwd_tag; + + fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + if (fwd_tag != NULL) { + KASSERT(((struct sockaddr *)(fwd_tag+1))->sa_family == + AF_INET6, ("%s: !AF_INET6", __func__)); + + m->m_flags &= ~(M_IP6_NEXTHOP | M_FASTFWD_OURS); + m_tag_delete(m, fwd_tag); + } +} diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index de7a938a3289..4cff071fc3d5 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -391,6 +391,11 @@ int ip6_deletefraghdr(struct mbuf *, int, int); int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int, uint32_t); +#define IP6_HAS_NEXTHOP(m) ((m)->m_flags & M_IP6_NEXTHOP) +int ip6_set_fwdtag(struct mbuf *, struct sockaddr_in6 *, struct ifnet *); +int ip6_get_fwdtag(struct mbuf *, struct sockaddr_in6 *, struct ifnet **); +void ip6_flush_fwdtag(struct mbuf *); + int route6_input(struct mbuf **, int *, int); void frag6_init(void); diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 816e3bdd2850..456586ea3f14 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -220,8 +220,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) int off = *offp; int cscov_partial; int plen, ulen; - struct sockaddr_in6 fromsa[2]; - struct m_tag *fwd_tag; + struct sockaddr_in6 fromsa[2], next_hop6; uint16_t uh_sum; uint8_t nxt; @@ -456,12 +455,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ - if ((m->m_flags & M_IP6_NEXTHOP) && - (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { - struct sockaddr_in6 *next_hop6; - - next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); - + if (IP6_HAS_NEXTHOP(m) && !ip6_get_fwdtag(m, &next_hop6, NULL)) { /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? @@ -476,14 +470,13 @@ udp6_input(struct mbuf **mp, int *offp, int proto) * any hardware-generated hash is ignored. */ inp = in6_pcblookup(pcbinfo, &ip6->ip6_src, - uh->uh_sport, &next_hop6->sin6_addr, - next_hop6->sin6_port ? htons(next_hop6->sin6_port) : + uh->uh_sport, &next_hop6.sin6_addr, + next_hop6.sin6_port ? htons(next_hop6.sin6_port) : uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif); } /* Remove the tag from the packet. We don't need it anymore. */ - m_tag_delete(m, fwd_tag); - m->m_flags &= ~M_IP6_NEXTHOP; + ip6_flush_fwdtag(m); } else inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, diff --git a/sys/netpfil/ipfw/ip_fw_pfil.c b/sys/netpfil/ipfw/ip_fw_pfil.c index 01a44df47180..9c68c6223b0c 100644 --- a/sys/netpfil/ipfw/ip_fw_pfil.c +++ b/sys/netpfil/ipfw/ip_fw_pfil.c @@ -129,6 +129,27 @@ ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags, int ipfw; args.flags = (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT; + + /* restore the correct forwarding interface */ + if (args.flags & IPFW_ARGS_OUT) switch (mtod(*m0, struct ip *)->ip_v) { +#ifdef INET6 + case IPV6_VERSION >> 4: + if (IP6_HAS_NEXTHOP(*m0)) { + ip6_get_fwdtag(*m0, NULL, &ifp); + } + break; +#endif +#ifdef INET + case IPVERSION: + if (IP_HAS_NEXTHOP(*m0)) { + ip_get_fwdtag(*m0, NULL, &ifp); + } + break; +#endif + default: + break; + } + again: /* * extract and remove the tag if present. If we are left @@ -164,8 +185,6 @@ ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags, ret = PFIL_DROPPED; #else { - void *psa; - size_t len; #ifdef INET if (args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR)) { MPASS((args.flags & (IPFW_ARGS_NH4 | @@ -173,12 +192,11 @@ ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags, IPFW_ARGS_NH4PTR)); MPASS((args.flags & (IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) == 0); - len = sizeof(struct sockaddr_in); - psa = (args.flags & IPFW_ARGS_NH4) ? - &args.hopstore : args.next_hop; - if (in_localip(satosin(psa)->sin_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; - (*m0)->m_flags |= M_IP_NEXTHOP; + if (ip_set_fwdtag(*m0, (args.flags & IPFW_ARGS_NH4) ? + &args.hopstore : args.next_hop, NULL)) { + ret = PFIL_DROPPED; + break; + } } #endif /* INET */ #ifdef INET6 @@ -188,38 +206,9 @@ ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags, IPFW_ARGS_NH6PTR)); MPASS((args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR)) == 0); - len = sizeof(struct sockaddr_in6); - psa = args.next_hop6; - (*m0)->m_flags |= M_IP6_NEXTHOP; - } -#endif /* INET6 */ - /* - * Incoming packets should not be tagged so we do not - * m_tag_find. Outgoing packets may be tagged, so we - * reuse the tag if present. - */ - tag = (flags & PFIL_IN) ? NULL : - m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL); - if (tag != NULL) { - m_tag_unlink(*m0, tag); - } else { - tag = m_tag_get(PACKET_TAG_IPFORWARD, len, - M_NOWAIT); - if (tag == NULL) { - ret = PFIL_DROPPED; - break; - } - } - if ((args.flags & IPFW_ARGS_NH6) == 0) - bcopy(psa, tag + 1, len); - m_tag_prepend(*m0, tag); - ret = 0; -#ifdef INET6 - /* IPv6 next hop needs additional handling */ - if (args.flags & (IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) { struct sockaddr_in6 *sa6; - sa6 = satosin6(tag + 1); + sa6 = satosin6(args.next_hop); if (args.flags & IPFW_ARGS_NH6) { sa6->sin6_family = AF_INET6; sa6->sin6_len = sizeof(*sa6); @@ -228,17 +217,10 @@ ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags, sa6->sin6_scope_id = args.hopstore6.sin6_scope_id; } - /* - * If nh6 address is link-local we should convert - * it to kernel internal form before doing any - * comparisons. - */ - if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) { + if (ip6_set_fwdtag(*m0, sa6, NULL)) { ret = PFIL_DROPPED; break; } - if (in6_localip(&sa6->sin6_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; } #endif /* INET6 */ } diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 9c0d2f985d5c..c9f3a92ff5a6 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -317,6 +317,9 @@ static void pf_mtag_free(struct m_tag *); static void pf_route(struct mbuf **, struct pf_krule *, int, struct ifnet *, struct pf_kstate *, struct pf_pdesc *, struct inpcb *); +static void pf_route_shared(struct mbuf **, struct pf_krule *, + int, struct ifnet *, struct pf_kstate *, + struct pf_pdesc *, struct inpcb *); #endif /* INET */ #ifdef INET6 static void pf_change_a6(struct pf_addr *, u_int16_t *, @@ -324,6 +327,9 @@ static void pf_change_a6(struct pf_addr *, u_int16_t *, static void pf_route6(struct mbuf **, struct pf_krule *, int, struct ifnet *, struct pf_kstate *, struct pf_pdesc *, struct inpcb *); +static void pf_route6_shared(struct mbuf **, struct pf_krule *, + int, struct ifnet *, struct pf_kstate *, + struct pf_pdesc *, struct inpcb *); #endif /* INET6 */ static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t); @@ -394,6 +400,20 @@ SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN, SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN, &pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call"); +VNET_DEFINE_STATIC(int, pf_share_forward) = 0; +VNET_DEFINE_STATIC(int, pf_share_forward6) = 0; + +#define V_pf_share_forward VNET(pf_share_forward) +#define V_pf_share_forward6 VNET(pf_share_forward6) + +SYSCTL_INT(_net_pf, OID_AUTO, share_forward, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pf_share_forward), 0, + "If set pf(4) will defer IPv4 forwarding to the network stack."); + +SYSCTL_INT(_net_pf, OID_AUTO, share_forward6, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pf_share_forward6), 0, + "If set pf(4) will defer IPv6 forwarding to the network stack."); + VNET_DEFINE(void *, pf_swi_cookie); VNET_DEFINE(struct intr_event *, pf_swi_ie); @@ -5957,6 +5977,101 @@ pf_route(struct mbuf **m, struct pf_krule *r, int dir, struct ifnet *oifp, m_freem(m0); goto done; } + +static void +pf_route_shared(struct mbuf **m, struct pf_krule *r, int dir, + struct ifnet *ifp, struct pf_kstate *s, struct pf_pdesc *pd, + struct inpcb *inp) +{ + struct mbuf *m0; + struct sockaddr_in dst; + struct ip *ip; + struct pf_addr naddr; + struct pf_ksrc_node *sn = NULL; + + KASSERT(m && *m && r && ifp, ("%s: invalid parameters", __func__)); + KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", + __func__)); + + if ((pd->pf_mtag == NULL && + ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || + pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad_locked; + } + + if (r->rt == PF_DUPTO) { + if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + } else { + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + m0 = *m; + } + + /* retain old behaviour by avoiding a rewrite */ + if (IP_HAS_NEXTHOP(m0)) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + + ip = mtod(m0, struct ip *); + + bzero(&dst, sizeof(dst)); + dst.sin_family = AF_INET; + dst.sin_len = sizeof(dst); + dst.sin_addr = ip->ip_dst; + + bzero(&naddr, sizeof(naddr)); + + if (TAILQ_EMPTY(&r->rpool.list)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); + goto bad_locked; + } + if (s == NULL) { + pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, + &naddr, NULL, &sn); + if (!PF_AZERO(&naddr, AF_INET)) + dst.sin_addr.s_addr = naddr.v4.s_addr; + ifp = r->rpool.cur->kif ? + r->rpool.cur->kif->pfik_ifp : NULL; + } else { + if (!PF_AZERO(&s->rt_addr, AF_INET)) + dst.sin_addr.s_addr = + s->rt_addr.v4.s_addr; + ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; + PF_STATE_UNLOCK(s); + } + if (ifp == NULL) + goto bad; + + if (ip_set_fwdtag(m0, &dst, ifp)) + goto bad; + + if ((r->rt == PF_DUPTO || r->rt == PF_REPLYTO) && IP_HAS_NEXTHOP(m0)) { + ip_forward(m0, 1); + if (r->rt == PF_REPLYTO) + *m = NULL; + } + return; + +bad_locked: + if (s) + PF_STATE_UNLOCK(s); +bad: + m_freem(m0); + if (r->rt != PF_DUPTO) + *m = NULL; +} #endif /* INET */ #ifdef INET6 @@ -6106,6 +6221,105 @@ pf_route6(struct mbuf **m, struct pf_krule *r, int dir, struct ifnet *oifp, m_freem(m0); goto done; } + +static void +pf_route6_shared(struct mbuf **m, struct pf_krule *r, int dir, + struct ifnet *ifp, struct pf_kstate *s, struct pf_pdesc *pd, + struct inpcb *inp) +{ + struct mbuf *m0; + struct sockaddr_in6 dst; + struct ip6_hdr *ip6; + struct pf_addr naddr; + struct pf_ksrc_node *sn = NULL; + + KASSERT(m && *m && r && ifp, ("%s: invalid parameters", __func__)); + KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", + __func__)); + + if ((pd->pf_mtag == NULL && + ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || + pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad_locked; + } + + if (r->rt == PF_DUPTO) { + if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + } else { + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + m0 = *m; + } + + /* retain old behaviour by avoiding a rewrite */ + if (IP6_HAS_NEXTHOP(m0)) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + + ip6 = mtod(m0, struct ip6_hdr *); + + bzero(&dst, sizeof(dst)); + dst.sin6_family = AF_INET6; + dst.sin6_len = sizeof(dst); + dst.sin6_addr = ip6->ip6_dst; + + bzero(&naddr, sizeof(naddr)); + + if (TAILQ_EMPTY(&r->rpool.list)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); + goto bad_locked; + } + if (s == NULL) { + pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, + &naddr, NULL, &sn); + if (!PF_AZERO(&naddr, AF_INET6)) + PF_ACPY((struct pf_addr *)&dst.sin6_addr, + &naddr, AF_INET6); + ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; + } else { + if (!PF_AZERO(&s->rt_addr, AF_INET6)) + PF_ACPY((struct pf_addr *)&dst.sin6_addr, + &s->rt_addr, AF_INET6); + ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; + } + + if (s) + PF_STATE_UNLOCK(s); + + if (ifp == NULL) + goto bad; + + if (ip6_set_fwdtag(m0, &dst, ifp)) + goto bad; + + if ((r->rt == PF_DUPTO || r->rt == PF_REPLYTO) && IP6_HAS_NEXTHOP(m0)) { + ip6_forward(m0, 1); + if (r->rt == PF_REPLYTO) + *m = NULL; + } + + return; + +bad_locked: + if (s) + PF_STATE_UNLOCK(s); +bad: + m_freem(m0); + if (r->rt != PF_DUPTO) + *m = NULL; +} #endif /* INET6 */ /* @@ -6255,6 +6469,7 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb * struct pf_kruleset *ruleset = NULL; struct pf_pdesc pd; int off, dirndx, pqid = 0; + int share_forward = V_pf_share_forward; PF_RULES_RLOCK_TRACKER; KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir)); @@ -6263,6 +6478,10 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb * if (!V_pf_status.running) return (PF_PASS); + if (share_forward && dir == PF_OUT && IP_HAS_NEXTHOP(m)) { + ip_get_fwdtag(m, NULL, &ifp); + } + memset(&pd, 0, sizeof(pd)); kif = (struct pfi_kkif *)ifp->if_pf_kif; @@ -6676,7 +6895,12 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb * default: /* pf_route() returns unlocked. */ if (r->rt) { - pf_route(m0, r, dir, kif->pfik_ifp, s, &pd, inp); + if (!share_forward) + pf_route(m0, r, dir, kif->pfik_ifp, s, + &pd, inp); + else + pf_route_shared(m0, r, dir, kif->pfik_ifp, s, + &pd, inp); return (action); } break; @@ -6705,6 +6929,7 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb struct pf_kruleset *ruleset = NULL; struct pf_pdesc pd; int off, terminal = 0, dirndx, rh_cnt = 0, pqid = 0; + int share_forward = V_pf_share_forward6; PF_RULES_RLOCK_TRACKER; KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir)); @@ -6713,6 +6938,10 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb if (!V_pf_status.running) return (PF_PASS); + if (share_forward && dir == PF_OUT && IP6_HAS_NEXTHOP(m)) { + ip6_get_fwdtag(m, NULL, &ifp); + } + memset(&pd, 0, sizeof(pd)); pd.pf_mtag = pf_find_mtag(m); @@ -7082,7 +7311,12 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb default: /* pf_route6() returns unlocked. */ if (r->rt) { - pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd, inp); + if (!share_forward) + pf_route6(m0, r, dir, kif->pfik_ifp, s, + &pd, inp); + else + pf_route6_shared(m0, r, dir, kif->pfik_ifp, s, + &pd, inp); return (action); } break;