Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

libnet/i/defaultipam: use ULA prefix by default #47853

Merged
merged 2 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@ package daemon // import "github.com/docker/docker/daemon"

import (
"context"
"crypto/sha256"
"encoding/binary"
"fmt"
"net"
"net/netip"
"os"
"path"
"path/filepath"
"runtime"
"slices"
"sync"
"sync/atomic"
"time"
Expand Down Expand Up @@ -60,6 +64,8 @@ import (
"github.com/docker/docker/libnetwork"
"github.com/docker/docker/libnetwork/cluster"
nwconfig "github.com/docker/docker/libnetwork/config"
"github.com/docker/docker/libnetwork/ipamutils"
"github.com/docker/docker/libnetwork/ipbits"
"github.com/docker/docker/pkg/authorization"
"github.com/docker/docker/pkg/fileutils"
"github.com/docker/docker/pkg/idtools"
Expand Down Expand Up @@ -1461,7 +1467,7 @@ func isBridgeNetworkDisabled(conf *config.Config) bool {
return conf.BridgeConfig.Iface == config.DisableNetworkBridge
}

func (daemon *Daemon) networkOptions(conf *config.Config, pg plugingetter.PluginGetter, activeSandboxes map[string]interface{}) ([]nwconfig.Option, error) {
func (daemon *Daemon) networkOptions(conf *config.Config, pg plugingetter.PluginGetter, hostID string, activeSandboxes map[string]interface{}) ([]nwconfig.Option, error) {
dd := runconfig.DefaultDaemonNetworkMode()

options := []nwconfig.Option{
Expand All @@ -1474,9 +1480,21 @@ func (daemon *Daemon) networkOptions(conf *config.Config, pg plugingetter.Plugin
driverOptions(conf),
}

defaultAddressPools := ipamutils.GetLocalScopeDefaultNetworks()
if len(conf.NetworkConfig.DefaultAddressPools.Value()) > 0 {
options = append(options, nwconfig.OptionDefaultAddressPoolConfig(conf.NetworkConfig.DefaultAddressPools.Value()))
defaultAddressPools = conf.NetworkConfig.DefaultAddressPools.Value()
}
// If the Engine admin don't configure default-address-pools or if they
// don't provide any IPv6 prefix, we derive a ULA prefix from the daemon's
// hostID and add it to the pools. This makes dynamic IPv6 subnet
// allocation possible out-of-the-box.
if !slices.ContainsFunc(defaultAddressPools, func(nw *ipamutils.NetworkToSplit) bool {
return nw.Base.Addr().Is6() && !nw.Base.Addr().Is4In6()
}) {
defaultAddressPools = append(defaultAddressPools, deriveULABaseNetwork(hostID))
}
options = append(options, nwconfig.OptionDefaultAddressPoolConfig(defaultAddressPools))

if conf.LiveRestoreEnabled && len(activeSandboxes) != 0 {
options = append(options, nwconfig.OptionActiveSandboxes(activeSandboxes))
}
Expand All @@ -1487,6 +1505,23 @@ func (daemon *Daemon) networkOptions(conf *config.Config, pg plugingetter.Plugin
return options, nil
}

// deriveULABaseNetwork derives a Global ID from the provided hostID and
// appends it to the ULA prefix (with L bit set) to generate a ULA prefix
// unique to this host. The returned ipamutils.NetworkToSplit is stable over
// time if hostID doesn't change.
//
// This is loosely based on the algorithm described in https://datatracker.ietf.org/doc/html/rfc4193#section-3.2.2.
func deriveULABaseNetwork(hostID string) *ipamutils.NetworkToSplit {
sha := sha256.Sum256([]byte(hostID))
gid := binary.BigEndian.Uint64(sha[:]) & (1<<40 - 1) // Keep the 40 least significant bits.
addr := ipbits.Add(netip.MustParseAddr("fd00::"), gid, 80)

return &ipamutils.NetworkToSplit{
Base: netip.PrefixFrom(addr, 48),
Size: 64,
}
}

// GetCluster returns the cluster
func (daemon *Daemon) GetCluster() Cluster {
return daemon.cluster
Expand Down
28 changes: 28 additions & 0 deletions daemon/daemon_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package daemon // import "github.com/docker/docker/daemon"

import (
"net/netip"
"os"
"path/filepath"
"runtime"
Expand Down Expand Up @@ -313,3 +314,30 @@ func TestFindNetworkErrorType(t *testing.T) {
t.Error("The FindNetwork method MUST always return an error that implements the NotFound interface and is ErrNoSuchNetwork")
}
}

// TestDeriveULABaseNetwork checks that for a given hostID, the derived prefix is stable over time.
func TestDeriveULABaseNetwork(t *testing.T) {
testcases := []struct {
name string
hostID string
expPrefix netip.Prefix
}{
{
name: "Empty hostID",
expPrefix: netip.MustParsePrefix("fd42:98fc:1c14::/48"),
},
{
name: "499d4bc0-b0b3-416f-b1ee-cf6486315593",
hostID: "499d4bc0-b0b3-416f-b1ee-cf6486315593",
expPrefix: netip.MustParsePrefix("fd62:fb69:18af::/48"),
},
}

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
nw := deriveULABaseNetwork(tc.hostID)
assert.Equal(t, nw.Base, tc.expPrefix)
assert.Equal(t, nw.Size, 64)
})
}
}
2 changes: 1 addition & 1 deletion daemon/daemon_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ func configureKernelSecuritySupport(config *config.Config, driverName string) er
// network settings. If there's active sandboxes, configuration changes will not
// take effect.
func (daemon *Daemon) initNetworkController(cfg *config.Config, activeSandboxes map[string]interface{}) error {
netOptions, err := daemon.networkOptions(cfg, daemon.PluginStore, activeSandboxes)
netOptions, err := daemon.networkOptions(cfg, daemon.PluginStore, daemon.id, activeSandboxes)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure we should depend on daemon.id here, or at least; what's the requirement for the ID here, and is it ok for this ID to change?

The daemon.id used to be a cryptographic key for signing (images schema v1), but is no longer used for that. Some systems used it for identifying daemons (and ISTR "Docker EE" / "UCP" used it for that purpose), so it was replaced by default with a generated UUID that's stored in a file in /var/lib/docker. HOWEVER the content of that file (and thus the "ID") is treated as an opaque value (it's returned in docker info, but not used elsewhere, although we may abuse it in some integration tests), but the file is allowed to be set by the user (and as such could be "anything").

There's currently no contract whatsoever for it to be bound to any lifecycle (it's stored in /var/lib/docker so in most situations it won't change unless /var/lib/docker is wiped, but it's not strictly defined to not change).

Note that swarm nodes do have a cryptographic ID, but that's part of the swarm cluster, and that one is not controllable by the user.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure we should depend on daemon.id here, or at least; what's the requirement for the ID here, and is it ok for this ID to change?

FWIW, my 2nd commit's message:

This change generates a ULA base network by deriving a ULA Global ID
from the Engine's Host ID and put that base network into
'default-address-pools'. This Host ID is stable over time (except if
users remove their '/var/lib/docker/engine-id') and thus the GID is
stable too.

This is loosely based on https://datatracker.ietf.org/doc/html/rfc4193#section-3.2.2.

The nature of what's passed to deriveULABaseNetwork doesn't matter, but it has to meet two requirements: 1. it should be fairly stable over time (eg. it's no big deal if it changes exceptionally); 2. and it should be generated from a random source that provides enough entropy to make it reasonably unique across nodes.

HOWEVER the content of that file (and thus the "ID") is treated as an opaque value

I get that it's opaque for most of the code, but I think it shouldn't be for the Daemon struct as it's what owns it ultimately. Said another way, I think it's totally reasonable for the daemon to expect it to have some level of randomness.

Since deriveULABaseNetwork doesn't care about its nature / source beyond what I described above ⬆️, it doesn't break the ID's opaqueness.

so it was replaced by default with a generated UUID

It's currently a UUIDv4. Hot new RFC9562 defines UUIDv4 in its Section 5.4 and points to its Section 6.9 "for guidelines on random data generation". That section says:

Implementations SHOULD utilize a cryptographically secure pseudorandom number generator (CSPRNG)

Looking at github.com/google/uuid, the UUID generator uses crypto/rand.Reader under the hood:

And crypto/rand.Reader is a CSPRNG.

but the file is allowed to be set by the user (and as such could be "anything").

Trusting kapa.ai, this file is described nowhere in the docs and AFAIK we always told users they shouldn't touch anything in /var/lib/docker and /var/run/docker. If users want to mess with this file, then fine -- garbage in, garbage out.

There's currently no contract whatsoever for it to be bound to any lifecycle (it's stored in /var/lib/docker so in most situations it won't change unless /var/lib/docker is wiped, but it's not strictly defined to not change).

Since it's an implementation detail, I fail to see what specification is needed here to clarify its lifecyle beyond the code itself. However, I could properly document expectations for this value by adding some GoDoc around it and its source.

I think everything is fine here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the default is a UUID now, but the engine-id can be manually set through the engine-id file;

echo "my-engine-id" > /var/lib/docker/engine-id
dockerd

docker info --format '{{.ID}}'
my-engine-id

I guess my thinking here is that we'll be using the ID for a new purpose, and if we want something unique, we should either consider producing a separate file for this, which could be a similar file, but specifically for networking; libnetwork already has a directory for its own state;

/ # ls /var/lib/docker/network/
files
/ # ls /var/lib/docker/network/files/
local-kv.db

Or, if the "a random source that provides enough entropy to make it reasonably unique across nodes." means it's relevant for swarm nodes, and only relevant for those (?) this could even be the actual swarm node's id;

docker swarm init
Swarm initialized: current node (kf26e4om953f4cnrhufgph746) is now a manager.

But of course that won't work if the node is not a swarm node 😅

Copy link
Member Author

@akerouanton akerouanton May 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the default is a UUID now, but the engine-id can be manually set through the engine-id file;

Sure it can, as much as one can open local-kv.db and do whatever they want there. But why would any user do that? Unless kapa.ai is lying to me, it's not something documented.

And while this provides enough entropy to comply with https://datatracker.ietf.org/doc/html/rfc4193#section-3.2.2, there's a really low probability that someone, somewhere will end up with a base ULA prefix conflicting with something on their network. Having the source host ID stored in a separate file provides an escape hatch for such situation. In such case, we could tell them to delete the engine-id file and restart their daemon (but not to write it themselves).

Or, if the "a random source that provides enough entropy to make it reasonably unique across nodes." means it's relevant for swarm nodes, and only relevant for those (?) this could even be the actual swarm node's id;

Nope, this change adds the ULA base network to the 'local' address space which isn't used by Swarm. This change only targets standalone Docker Engine.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But why would any user do that? Unless kapa.ai is lying to me, it's not something documented.

It's really hard to assume it's not used, at least I recall Docker EE depending on some of this, and may even have been setting custom identifiers; even to the extent that we added a config for this, e428c82, and later had to reverse a change that changed behavior; f695e98

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the commit message states, the revert was necessary in large part because the engine ID was changing across upgrades. That was solved once and for all in #43555 by migrating the ID. Aside from that, Classic Swarm depended on Engine IDs being unique across all nodes. Mirantis Container Runtime does not care about engine ID in the slightest; I checked all the patches. And I could find no evidence that Mirantis Kubernetes Engine (the product formerly known as Docker EE) cares about the engine ID, either. I have found mentions that Docker EE had migrated to using the Swarm Node ID values in place of the engine IDs way back in ~2017. There is limited evidence engine identifiers are being used these days, and no evidence that anyone is setting custom engine identifiers.

Keep in mind that no escape hatch is necessary if the generated ULA prefix is unsuitable for a given user for whatever reason. The generated prefix is merely the implicit default. The user always has the option to override the default by configuring the daemon to allocate subnets from any IPv6 address pool of their choosing. The default just has to be good enough for the common case. A good enough default ULA prefix is stable across daemon restarts, upgrades, backup and restore, and is reasonably likely to not collide with other daemons. The Engine ID has those properties, which makes it an ideal seed to derive the ULA prefix from.

What's the problem if a user does customize the engine ID? We are just using the engine ID as an opaque seed value. A valid and probabilistically unique ULA prefix will be derived from any unique seed. It is only a potential problem if the engine IDs are non-unique, when containers from multiple non-unique engines are attached to the same physical network using the macvlan or ipvlan drivers. If someone does require multiple non-unique custom engine IDs with IPv6 macvlan/ipvlan containers to coexist in the same physical LAN, they can configure an explicit IPv6 address pool on each node, explicitly configure the address range of the ipvlan/macvlan network, set the IP addresses of containers explicitly, or take advantage of any of the other configuration knobs to override the implicit default they made problematic through their own actions.

Deriving the generated ULA prefix from the engine-id has a distinct advantage over something specific to networking: there is a single source of truth for engine identity for users to re-roll when needed. See, for example, #13278. Users only need to delete /var/lib/docker/engine-id before taking a VM snapshot or capturing a disk image to produce a system image which retains all the configuration and state (volumes, networks, containers, image cache, etc.) but results in each cloned instance having a distinct identity. If engine identity was scattered across multiple state files (or worse, local-kv.db) some Sysprep-like tool would be necessary to reliably generalize an engine's state.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot to reel back to this one; we discussed this in the maintainers call, and discussed the potential risks; looks like this should not be problematic for scenarios we discussed, so no objections from me.

if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion daemon/daemon_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ func configureMaxThreads(config *config.Config) error {
}

func (daemon *Daemon) initNetworkController(daemonCfg *config.Config, activeSandboxes map[string]interface{}) error {
netOptions, err := daemon.networkOptions(daemonCfg, nil, nil)
netOptions, err := daemon.networkOptions(daemonCfg, nil, daemon.id, nil)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion daemon/reload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ func TestDaemonReloadNetworkDiagnosticPort(t *testing.T) {
},
}

netOptions, err := daemon.networkOptions(&config.Config{CommonConfig: config.CommonConfig{Root: t.TempDir()}}, nil, nil)
netOptions, err := daemon.networkOptions(&config.Config{CommonConfig: config.CommonConfig{Root: t.TempDir()}}, nil, "", nil)
if err != nil {
t.Fatal(err)
}
Expand Down
26 changes: 26 additions & 0 deletions integration/network/bridge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package network

import (
"context"
"net/netip"
"strings"
"testing"
"time"

"github.com/docker/docker/api/types"
networktypes "github.com/docker/docker/api/types/network"
"github.com/docker/docker/api/types/versions"
ctr "github.com/docker/docker/integration/internal/container"
Expand Down Expand Up @@ -43,3 +45,27 @@ func TestCreateWithMultiNetworks(t *testing.T) {
ifacesWithAddress := strings.Count(res.Stdout.String(), "\n")
assert.Equal(t, ifacesWithAddress, 3)
}

func TestCreateWithIPv6DefaultsToULAPrefix(t *testing.T) {
// On Windows, network creation fails with this error message: Error response from daemon: this request is not supported by the 'windows' ipam driver
skip.If(t, testEnv.DaemonInfo.OSType == "windows")

ctx := setupTest(t)
apiClient := testEnv.APIClient()

const nwName = "testnetula"
network.CreateNoError(ctx, t, apiClient, nwName, network.WithIPv6())
defer network.RemoveNoError(ctx, t, apiClient, nwName)

nw, err := apiClient.NetworkInspect(ctx, "testnetula", types.NetworkInspectOptions{})
assert.NilError(t, err)

for _, ipam := range nw.IPAM.Config {
ipr := netip.MustParsePrefix(ipam.Subnet)
if netip.MustParsePrefix("fd00::/8").Overlaps(ipr) {
return
}
}

t.Fatalf("Network %s has no ULA prefix, expected one.", nwName)
}
4 changes: 0 additions & 4 deletions libnetwork/ipams/defaultipam/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ const (
// two optional address pools respectively containing the list of user-defined
// address pools for 'local' and 'global' address spaces.
func Register(ic ipamapi.Registerer, lAddrPools, gAddrPools []*ipamutils.NetworkToSplit) error {
if len(lAddrPools) == 0 {
lAddrPools = ipamutils.GetLocalScopeDefaultNetworks()
}

if len(gAddrPools) == 0 {
gAddrPools = ipamutils.GetGlobalScopeDefaultNetworks()
}
Expand Down
10 changes: 7 additions & 3 deletions libnetwork/libnetwork_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ import (
"time"

"github.com/docker/docker/internal/testutils/netnsutils"
"github.com/docker/docker/libnetwork/config"
"github.com/docker/docker/libnetwork/driverapi"
"github.com/docker/docker/libnetwork/ipams/defaultipam"
"github.com/docker/docker/libnetwork/ipamutils"
"github.com/docker/docker/libnetwork/netlabel"
"github.com/docker/docker/libnetwork/netutils"
"github.com/docker/docker/libnetwork/scope"
Expand Down Expand Up @@ -353,13 +355,14 @@ func TestSRVServiceQuery(t *testing.T) {

defer netnsutils.SetupTestOSContext(t)()

c, err := New(OptionBoltdbWithRandomDBFile(t))
c, err := New(OptionBoltdbWithRandomDBFile(t),
config.OptionDefaultAddressPoolConfig(ipamutils.GetLocalScopeDefaultNetworks()))
if err != nil {
t.Fatal(err)
}
defer c.Stop()

n, err := c.NewNetwork("bridge", "net1", "", nil)
n, err := c.NewNetwork("bridge", "net1", "")
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -451,7 +454,8 @@ func TestServiceVIPReuse(t *testing.T) {

defer netnsutils.SetupTestOSContext(t)()

c, err := New(OptionBoltdbWithRandomDBFile(t))
c, err := New(OptionBoltdbWithRandomDBFile(t),
config.OptionDefaultAddressPoolConfig(ipamutils.GetLocalScopeDefaultNetworks()))
if err != nil {
t.Fatal(err)
}
Expand Down
2 changes: 2 additions & 0 deletions libnetwork/libnetwork_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/docker/docker/libnetwork/driverapi"
"github.com/docker/docker/libnetwork/ipams/defaultipam"
"github.com/docker/docker/libnetwork/ipams/null"
"github.com/docker/docker/libnetwork/ipamutils"
"github.com/docker/docker/libnetwork/netlabel"
"github.com/docker/docker/libnetwork/options"
"github.com/docker/docker/libnetwork/osl"
Expand Down Expand Up @@ -58,6 +59,7 @@ func newController(t *testing.T) *libnetwork.Controller {
"EnableIPForwarding": true,
},
}),
config.OptionDefaultAddressPoolConfig(ipamutils.GetLocalScopeDefaultNetworks()),
)
if err != nil {
t.Fatal(err)
Expand Down
8 changes: 6 additions & 2 deletions libnetwork/resolver_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ import (
"testing"

"github.com/docker/docker/internal/testutils/netnsutils"
"github.com/docker/docker/libnetwork/config"
"github.com/docker/docker/libnetwork/ipamutils"
"github.com/miekg/dns"
)

// test only works on linux
func TestDNSIPQuery(t *testing.T) {
defer netnsutils.SetupTestOSContext(t)()
c, err := New(OptionBoltdbWithRandomDBFile(t))
c, err := New(OptionBoltdbWithRandomDBFile(t),
config.OptionDefaultAddressPoolConfig(ipamutils.GetLocalScopeDefaultNetworks()))
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -110,7 +113,8 @@ func TestDNSProxyServFail(t *testing.T) {
osctx := netnsutils.SetupTestOSContextEx(t)
defer osctx.Cleanup(t)

c, err := New(OptionBoltdbWithRandomDBFile(t))
c, err := New(OptionBoltdbWithRandomDBFile(t),
config.OptionDefaultAddressPoolConfig(ipamutils.GetLocalScopeDefaultNetworks()))
if err != nil {
t.Fatal(err)
}
Expand Down
2 changes: 2 additions & 0 deletions libnetwork/sandbox_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/docker/docker/internal/testutils/netnsutils"
"github.com/docker/docker/libnetwork/config"
"github.com/docker/docker/libnetwork/ipams/defaultipam"
"github.com/docker/docker/libnetwork/ipamutils"
"github.com/docker/docker/libnetwork/netlabel"
"github.com/docker/docker/libnetwork/options"
"github.com/docker/docker/libnetwork/osl"
Expand All @@ -24,6 +25,7 @@ func getTestEnv(t *testing.T, opts ...[]NetworkOption) (*Controller, []*Network)
config.OptionDriverConfig(netType, map[string]any{
netlabel.GenericData: options.Generic{"EnableIPForwarding": true},
}),
config.OptionDefaultAddressPoolConfig(ipamutils.GetLocalScopeDefaultNetworks()),
)
if err != nil {
t.Fatal(err)
Expand Down
5 changes: 4 additions & 1 deletion libnetwork/service_common_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@ import (
"testing"

"github.com/docker/docker/internal/testutils/netnsutils"
"github.com/docker/docker/libnetwork/config"
"github.com/docker/docker/libnetwork/ipamutils"
"gotest.tools/v3/assert"
)

func TestCleanupServiceDiscovery(t *testing.T) {
defer netnsutils.SetupTestOSContext(t)()
c, err := New(OptionBoltdbWithRandomDBFile(t))
c, err := New(OptionBoltdbWithRandomDBFile(t),
config.OptionDefaultAddressPoolConfig(ipamutils.GetLocalScopeDefaultNetworks()))
assert.NilError(t, err)
defer c.Stop()

Expand Down
Loading