Skip to content

Commit

Permalink
Do not remove kernel-ll addresses from bridges
Browse files Browse the repository at this point in the history
Make the behaviour enabled by env var DOCKER_BRIDGE_PRESERVE_KERNEL_LL
the default...
- don't remove kernel assigned link-local addresses
  - or any address in fe80::/64
- don't assign fe80::1 to a bridge

Signed-off-by: Rob Murray <rob.murray@docker.com>
  • Loading branch information
robmry committed May 1, 2024
1 parent 9d07820 commit f46473b
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 148 deletions.
62 changes: 28 additions & 34 deletions integration/networking/bridge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,41 +531,35 @@ func TestDefaultBridgeAddresses(t *testing.T) {
},
}

for _, preserveKernelLL := range []bool{false, true} {
var dopts []daemon.Option
if preserveKernelLL {
dopts = append(dopts, daemon.WithEnvVars("DOCKER_BRIDGE_PRESERVE_KERNEL_LL=1"))
}
d := daemon.New(t, dopts...)
c := d.NewClientT(t)

for _, tc := range testcases {
for _, step := range tc.steps {
tcName := fmt.Sprintf("kernel_ll_%v/%s/%s", preserveKernelLL, tc.name, step.stepName)
t.Run(tcName, func(t *testing.T) {
ctx := testutil.StartSpan(ctx, t)
// Check that the daemon starts - regression test for:
// https://github.com/moby/moby/issues/46829
d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)

// Start a container, so that the bridge is set "up" and gets a kernel_ll address.
cID := container.Run(ctx, t, c)
defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})

d.Stop(t)

// Check that the expected addresses have been applied to the bridge. (Skip in
// rootless mode, because the bridge is in a different network namespace.)
if !testEnv.IsRootless() {
res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
assert.Equal(t, res.ExitCode, 0, step.stepName)
stdout := res.Stdout()
for _, expAddr := range step.expAddrs {
assert.Check(t, is.Contains(stdout, expAddr))
}
d := daemon.New(t)
defer d.Stop(t)
c := d.NewClientT(t)

for _, tc := range testcases {
for _, step := range tc.steps {
t.Run(tc.name+"/"+step.stepName, func(t *testing.T) {
ctx := testutil.StartSpan(ctx, t)
// Check that the daemon starts - regression test for:
// https://github.com/moby/moby/issues/46829
d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)

// Start a container, so that the bridge is set "up" and gets a kernel_ll address.
cID := container.Run(ctx, t, c)
defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})

d.Stop(t)

// Check that the expected addresses have been applied to the bridge. (Skip in
// rootless mode, because the bridge is in a different network namespace.)
if !testEnv.IsRootless() {
res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
assert.Equal(t, res.ExitCode, 0, step.stepName)
stdout := res.Stdout()
for _, expAddr := range step.expAddrs {
assert.Check(t, is.Contains(stdout, expAddr))
}
})
}
}
})
}
}
}
Expand Down
99 changes: 37 additions & 62 deletions libnetwork/drivers/bridge/interface_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"net"
"net/netip"
"os"

"github.com/containerd/log"
"github.com/docker/docker/errdefs"
Expand Down Expand Up @@ -71,90 +70,66 @@ func (i *bridgeInterface) addresses(family int) ([]netlink.Addr, error) {
return addrs, nil
}

func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, err error) {
requiredAddrs = make(map[netip.Addr]netip.Prefix)

if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") != "1" {
// Always give the bridge 'fe80::1' - every interface is required to have an
// address in 'fe80::/64'. Linux may assign an address, but we'll replace it with
// 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool
// assigned address will not be a second address in the LL subnet.
ra, ok := netiputil.ToPrefix(bridgeIPv6)
if !ok {
err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
return nil, err
}
requiredAddrs[ra.Addr()] = ra
}
func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error {
// Remember the configured addresses.
i.bridgeIPv6 = config.AddressIPv6
i.gatewayIPv6 = config.AddressIPv6.IP

ra, ok := netiputil.ToPrefix(config.AddressIPv6)
addrPrefix, ok := netiputil.ToPrefix(config.AddressIPv6)
if !ok {
err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String())
return nil, err
return errdefs.System(
fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix",
config.AddressIPv6.String()))
}
requiredAddrs[ra.Addr()] = ra

return requiredAddrs, nil
}

func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error {
// Get the IPv6 addresses currently assigned to the bridge, if any.
existingAddrs, err := i.addresses(netlink.FAMILY_V6)
if err != nil {
return errdefs.System(err)
}

// Get the list of required IPv6 addresses for this bridge.
var requiredAddrs map[netip.Addr]netip.Prefix
requiredAddrs, err = getRequiredIPv6Addrs(config)
if err != nil {
return errdefs.System(err)
}
i.bridgeIPv6 = config.AddressIPv6
i.gatewayIPv6 = config.AddressIPv6.IP

// Remove addresses that aren't required.
for _, existingAddr := range existingAddrs {
ea, ok := netip.AddrFromSlice(existingAddr.IP)
if !ok {
return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
return errdefs.System(
fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
}
// Optionally, avoid deleting the kernel-assigned link local address.
// (Don't delete fe80::1 either - if it was previously assigned to the bridge, and the
// kernel_ll address was deleted, the bridge won't get a new kernel_ll address.)
if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") == "1" {
if p, _ := ea.Prefix(64); p == linkLocalPrefix {
continue
}
// Don't delete the kernel-assigned link local address (or fe80::1 - if it was
// assigned to the bridge by an older version of the daemon that deleted the
// kernel_ll address, the bridge won't get a new kernel_ll address.) But, do
// delete unexpected link-local addresses (fe80::/10) that aren't in fe80::/64,
// those have been IPAM-assigned.
if p, _ := ea.Prefix(64); p == linkLocalPrefix {
continue
}
// Ignore the prefix length when comparing addresses, it's informational
// (RFC-5942 section 4), and removing/re-adding an address that's still valid
// would disrupt traffic on live-restore.
if _, required := requiredAddrs[ea]; !required {
if ea != addrPrefix.Addr() {
err := i.nlh.AddrDel(i.Link, &existingAddr) //#nosec G601 -- Memory aliasing is not an issue in practice as the &existingAddr pointer is not retained by the callee after the AddrDel() call returns.
if err != nil {
log.G(context.TODO()).WithFields(log.Fields{"error": err, "address": existingAddr.IPNet}).Warnf("Failed to remove residual IPv6 address from bridge")
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"address": existingAddr.IPNet},
).Warnf("Failed to remove residual IPv6 address from bridge")
}
}
}
// Add or update required addresses.
for _, addrPrefix := range requiredAddrs {
// Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an
// existing bridge in a way that doesn't affect the bridge's assigned address,
// the old address has not been removed at this point - because that would be
// service-affecting for a running container.
//
// But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to
// '2000:dbe::/80', the default bridge will still be assigned address
// '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and
// the user is likely to expect to see it updated from '64' to '80'.
// Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
// doesn't update the prefix length. This is a cosmetic problem, the prefix
// length of an assigned address is not used to determine whether an address is
// "on-link" (RFC-5942).
if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
}
// Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an
// existing bridge in a way that doesn't affect the bridge's assigned address,
// the old address has not been removed at this point - because that would be
// service-affecting for a running container.
//
// But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to
// '2000:dbe::/80', the default bridge will still be assigned address
// '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and
// the user is likely to expect to see it updated from '64' to '80'.
// Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
// doesn't update the prefix length. This is a cosmetic problem, the prefix
// length of an assigned address is not used to determine whether an address is
// "on-link" (RFC-5942).
if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
}
return nil
}
60 changes: 11 additions & 49 deletions libnetwork/drivers/bridge/interface_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@ package bridge

import (
"net"
"net/netip"
"strings"
"testing"

"github.com/docker/docker/internal/testutils/netnsutils"
"github.com/google/go-cmp/cmp"
"github.com/vishvananda/netlink"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
Expand Down Expand Up @@ -96,48 +93,6 @@ func TestAddressesNonEmptyInterface(t *testing.T) {
assert.Equal(t, addrs[0].IPNet.String(), expAddrV6)
}

func TestGetRequiredIPv6Addrs(t *testing.T) {
testcases := []struct {
name string
addressIPv6 string
expReqdAddrs []string
}{
{
name: "Regular address, expect default link local",
addressIPv6: "2000:3000::1/80",
expReqdAddrs: []string{"fe80::1/64", "2000:3000::1/80"},
},
{
name: "Standard link local address only",
addressIPv6: "fe80::1/64",
expReqdAddrs: []string{"fe80::1/64"},
},
{
name: "Nonstandard link local address",
addressIPv6: "fe80:abcd::1/42",
expReqdAddrs: []string{"fe80:abcd::1/42", "fe80::1/64"},
},
}

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
config := &networkConfiguration{
AddressIPv6: cidrToIPNet(t, tc.addressIPv6),
}

expResult := map[netip.Addr]netip.Prefix{}
for _, addr := range tc.expReqdAddrs {
expResult[netip.MustParseAddr(strings.Split(addr, "/")[0])] = netip.MustParsePrefix(addr)
}

reqd, err := getRequiredIPv6Addrs(config)
assert.Check(t, is.Nil(err))
assert.Check(t, is.DeepEqual(reqd, expResult,
cmp.Comparer(func(a, b netip.Prefix) bool { return a == b })))
})
}
}

func TestProgramIPv6Addresses(t *testing.T) {
defer netnsutils.SetupTestOSContext(t)()

Expand All @@ -159,19 +114,19 @@ func TestProgramIPv6Addresses(t *testing.T) {
i := prepTestBridge(t, nc)

// The bridge has no addresses, ask for a regular IPv6 network and expect it to
// be added to the bridge, with the default link local address.
// be added to the bridge.
nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64")
err := i.programIPv6Addresses(nc)
assert.NilError(t, err)
checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
checkAddrs(i, nc, []string{"2000:3000::1/64"})

// Shrink the subnet of that regular address, the prefix length of the address
// will not be modified - but it's informational-only, the address itself has
// not changed.
nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/80")
err = i.programIPv6Addresses(nc)
assert.NilError(t, err)
checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
checkAddrs(i, nc, []string{"2000:3000::1/64"})

// Ask for link-local only, by specifying an address with the Link Local prefix.
// The regular address should be removed.
Expand All @@ -180,9 +135,16 @@ func TestProgramIPv6Addresses(t *testing.T) {
assert.NilError(t, err)
checkAddrs(i, nc, []string{"fe80::1/64"})

// Swap the standard link local address for a nonstandard one.
// Swap the standard link local address for a nonstandard one. The standard LL
// address will not be removed.
nc.AddressIPv6 = cidrToIPNet(t, "fe80:5555::1/55")
err = i.programIPv6Addresses(nc)
assert.NilError(t, err)
checkAddrs(i, nc, []string{"fe80:5555::1/55", "fe80::1/64"})

// Back to the original address, expect the nonstandard LL address to be replaced.
nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64")
err = i.programIPv6Addresses(nc)
assert.NilError(t, err)
checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
}
2 changes: 1 addition & 1 deletion libnetwork/drivers/bridge/network_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func TestLinkCreate(t *testing.T) {

ip6 := te.iface.addrv6.IP
if !n.bridge.bridgeIPv6.Contains(ip6) {
t.Fatalf("IP %s is not a valid ip in the subnet %s", ip6.String(), bridgeIPv6.String())
t.Fatalf("IP %s is not a valid ip in the subnet %s", ip6.String(), n.bridge.bridgeIPv6.String())
}

if !te.gw.Equal(n.bridge.bridgeIPv4.IP) {
Expand Down
4 changes: 2 additions & 2 deletions libnetwork/drivers/bridge/setup_ipv6_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ func TestSetupIPv6(t *testing.T) {

var found bool
for _, addr := range addrsv6 {
if bridgeIPv6.String() == addr.IPNet.String() {
if config.AddressIPv6.String() == addr.IPNet.String() {
found = true
break
}
}

if !found {
t.Fatalf("Bridge device does not have requested IPv6 address %v", bridgeIPv6)
t.Fatalf("Bridge device does not have requested IPv6 address %v", config.AddressIPv6)
}
}

Expand Down

0 comments on commit f46473b

Please sign in to comment.