Skip to content

Commit

Permalink
Merge pull request #44742 from akerouanton/fix-44688
Browse files Browse the repository at this point in the history
Clear conntrack entries for published UDP ports
  • Loading branch information
neersighted committed Jan 5, 2023
2 parents fcb5245 + b37d343 commit e927539
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 11 deletions.
17 changes: 7 additions & 10 deletions libnetwork/drivers/bridge/bridge.go
Original file line number Diff line number Diff line change
Expand Up @@ -1328,10 +1328,9 @@ func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string
}
}()

// Clean the connection tracker state of the host for the
// specific endpoint. This is needed because some flows may be
// bound to the local proxy and won't bre redirect to the new endpoints.
clearEndpointConnections(d.nlh, endpoint)
// Clean the connection tracker state of the host for the specific endpoint. This is needed because some flows may
// be bound to the local proxy, or to the host (for UDP packets), and won't be redirected to the new endpoints.
clearConntrackEntries(d.nlh, endpoint)

if err = d.storeUpdate(endpoint); err != nil {
return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)
Expand Down Expand Up @@ -1366,12 +1365,10 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {

endpoint.portMapping = nil

// Clean the connection tracker state of the host for the specific endpoint
// The host kernel keeps track of the connections (TCP and UDP), so if a new endpoint gets the same IP of
// this one (that is going down), is possible that some of the packets would not be routed correctly inside
// the new endpoint
// Deeper details: https://github.com/docker/docker/issues/8795
clearEndpointConnections(d.nlh, endpoint)
// Clean the connection tracker state of the host for the specific endpoint. This is a precautionary measure to
// avoid new endpoints getting the same IP address to receive unexpected packets due to bad conntrack state leading
// to bad NATing.
clearConntrackEntries(d.nlh, endpoint)

if err = d.storeUpdate(endpoint); err != nil {
return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)
Expand Down
24 changes: 23 additions & 1 deletion libnetwork/drivers/bridge/setup_ip_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"net"

"github.com/docker/docker/libnetwork/iptables"
"github.com/docker/docker/libnetwork/types"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
Expand Down Expand Up @@ -418,14 +419,35 @@ func setupInternalNetworkRules(bridgeIface string, addr *net.IPNet, icc, insert
return setIcc(version, bridgeIface, icc, insert)
}

func clearEndpointConnections(nlh *netlink.Handle, ep *bridgeEndpoint) {
// clearConntrackEntries flushes conntrack entries matching endpoint IP address
// or matching one of the exposed UDP port.
// In the first case, this could happen if packets were received by the host
// between userland proxy startup and iptables setup.
// In the latter case, this could happen if packets were received whereas there
// were nowhere to route them, as netfilter creates entries in such case.
// This is required because iptables NAT rules are evaluated by netfilter only
// when creating a new conntrack entry. When Docker latter adds NAT rules,
// netfilter ignore them for any packet matching a pre-existing conntrack entry.
// As such, we need to flush all those conntrack entries to make sure NAT rules
// are correctly applied to all packets.
// See: #8795, #44688 & #44742.
func clearConntrackEntries(nlh *netlink.Handle, ep *bridgeEndpoint) {
var ipv4List []net.IP
var ipv6List []net.IP
var udpPorts []uint16

if ep.addr != nil {
ipv4List = append(ipv4List, ep.addr.IP)
}
if ep.addrv6 != nil {
ipv6List = append(ipv6List, ep.addrv6.IP)
}
for _, pb := range ep.portMapping {
if pb.Proto == types.UDP {
udpPorts = append(udpPorts, pb.HostPort)
}
}

iptables.DeleteConntrackEntries(nlh, ipv4List, ipv6List)
iptables.DeleteConntrackEntriesByPort(nlh, types.UDP, udpPorts)
}
37 changes: 37 additions & 0 deletions libnetwork/iptables/conntrack.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"net"
"syscall"

"github.com/docker/docker/libnetwork/types"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
Expand Down Expand Up @@ -53,6 +54,42 @@ func DeleteConntrackEntries(nlh *netlink.Handle, ipv4List []net.IP, ipv6List []n
return totalIPv4FlowPurged, totalIPv6FlowPurged, nil
}

func DeleteConntrackEntriesByPort(nlh *netlink.Handle, proto types.Protocol, ports []uint16) error {
if !IsConntrackProgrammable(nlh) {
return ErrConntrackNotConfigurable
}

var totalIPv4FlowPurged uint
var totalIPv6FlowPurged uint

for _, port := range ports {
filter := &netlink.ConntrackFilter{}
if err := filter.AddProtocol(uint8(proto)); err != nil {
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
continue
}
if err := filter.AddPort(netlink.ConntrackOrigDstPort, port); err != nil {
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
continue
}

v4FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET, filter)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for IPv4 %s port %d: %v", proto.String(), port, err)
}
totalIPv4FlowPurged += v4FlowPurged

v6FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET6, filter)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for IPv6 %s port %d: %v", proto.String(), port, err)
}
totalIPv6FlowPurged += v6FlowPurged
}

logrus.Debugf("DeleteConntrackEntriesByPort for %s ports purged ipv4:%d, ipv6:%d", proto.String(), totalIPv4FlowPurged, totalIPv6FlowPurged)
return nil
}

func purgeConntrackState(nlh *netlink.Handle, family netlink.InetFamily, ipAddress net.IP) (uint, error) {
filter := &netlink.ConntrackFilter{}
// NOTE: doing the flush using the ipAddress is safe because today there cannot be multiple networks with the same subnet
Expand Down

0 comments on commit e927539

Please sign in to comment.