Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "kubelet/network: report but tolerate errors returned from GetNetNS()" #46739

Merged
merged 1 commit into from Jun 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
63 changes: 12 additions & 51 deletions pkg/kubelet/dockershim/docker_sandbox.go
Expand Up @@ -48,26 +48,6 @@ const (
runtimeName = "docker"
)

// Returns whether the sandbox network is ready, and whether the sandbox is known
func (ds *dockerService) getNetworkReady(podSandboxID string) (bool, bool) {
ds.networkReadyLock.Lock()
defer ds.networkReadyLock.Unlock()
ready, ok := ds.networkReady[podSandboxID]
return ready, ok
}

func (ds *dockerService) setNetworkReady(podSandboxID string, ready bool) {
ds.networkReadyLock.Lock()
defer ds.networkReadyLock.Unlock()
ds.networkReady[podSandboxID] = ready
}

func (ds *dockerService) clearNetworkReady(podSandboxID string) {
ds.networkReadyLock.Lock()
defer ds.networkReadyLock.Unlock()
delete(ds.networkReady, podSandboxID)
}

// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
// For docker, PodSandbox is implemented by a container holding the network
Expand Down Expand Up @@ -102,8 +82,6 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
}

ds.setNetworkReady(createResp.ID, false)

// Step 3: Create Sandbox Checkpoint.
if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
return createResp.ID, err
Expand Down Expand Up @@ -136,7 +114,6 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str

// Do not invoke network plugins if in hostNetwork mode.
if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork {
ds.setNetworkReady(createResp.ID, true)
return createResp.ID, nil
}

Expand All @@ -149,15 +126,12 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
// recognized by the CNI standard yet.
cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
if err == nil {
ds.setNetworkReady(createResp.ID, true)
} else {
if err != nil {
// TODO(random-liu): Do we need to teardown network here?
if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
}
}

return createResp.ID, err
}

Expand Down Expand Up @@ -225,10 +199,7 @@ func (ds *dockerService) StopPodSandbox(podSandboxID string) error {
errList := []error{}
if needNetworkTearDown {
cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID)
err := ds.network.TearDownPod(namespace, name, cID)
if err == nil {
ds.setNetworkReady(podSandboxID, false)
} else {
if err := ds.network.TearDownPod(namespace, name, cID); err != nil {
errList = append(errList, err)
}
}
Expand Down Expand Up @@ -270,8 +241,6 @@ func (ds *dockerService) RemovePodSandbox(podSandboxID string) error {
errs = append(errs, err)
}

ds.clearNetworkReady(podSandboxID)

// Remove the checkpoint of the sandbox.
if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil {
errs = append(errs, err)
Expand All @@ -289,6 +258,9 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st
cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID)
networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID)
if err != nil {
// This might be a sandbox that somehow ended up without a default
// interface (eth0). We can't distinguish this from a more serious
// error, so callers should probably treat it as non-fatal.
return "", err
}
if networkStatus == nil {
Expand All @@ -300,7 +272,7 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st
// getIP returns the ip given the output of `docker inspect` on a pod sandbox,
// first interrogating any registered plugins, then simply trusting the ip
// in the sandbox itself. We look for an ipv4 address before ipv6.
func (ds *dockerService) getIP(podSandboxID string, sandbox *dockertypes.ContainerJSON) (string, error) {
func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, error) {
if sandbox.NetworkSettings == nil {
return "", nil
}
Expand All @@ -309,18 +281,11 @@ func (ds *dockerService) getIP(podSandboxID string, sandbox *dockertypes.Contain
// reporting the IP.
return "", nil
}

// Don't bother getting IP if the pod is known and networking isn't ready
ready, ok := ds.getNetworkReady(podSandboxID)
if ok && !ready {
return "", nil
if IP, err := ds.getIPFromPlugin(sandbox); err != nil {
glog.Warningf("%v", err)
} else if IP != "" {
return IP, nil
}

ip, err := ds.getIPFromPlugin(sandbox)
if err == nil {
return ip, nil
}

// TODO: trusting the docker ip is not a great idea. However docker uses
// eth0 by default and so does CNI, so if we find a docker IP here, we
// conclude that the plugin must have failed setup, or forgotten its ip.
Expand All @@ -329,11 +294,7 @@ func (ds *dockerService) getIP(podSandboxID string, sandbox *dockertypes.Contain
if sandbox.NetworkSettings.IPAddress != "" {
return sandbox.NetworkSettings.IPAddress, nil
}
if sandbox.NetworkSettings.GlobalIPv6Address != "" {
return sandbox.NetworkSettings.GlobalIPv6Address, nil
}

return "", fmt.Errorf("failed to read pod IP from plugin/docker: %v", err)
return sandbox.NetworkSettings.GlobalIPv6Address, nil
}

// PodSandboxStatus returns the status of the PodSandbox.
Expand All @@ -356,7 +317,7 @@ func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeapi.PodS
if r.State.Running {
state = runtimeapi.PodSandboxState_SANDBOX_READY
}
IP, err := ds.getIP(podSandboxID, r)
IP, err := ds.getIP(r)
if err != nil {
return nil, err
}
Expand Down
45 changes: 0 additions & 45 deletions pkg/kubelet/dockershim/docker_sandbox_test.go
Expand Up @@ -133,8 +133,6 @@ func TestSandboxStatus(t *testing.T) {
expected.State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY
err = ds.StopPodSandbox(id)
assert.NoError(t, err)
// IP not valid after sandbox stop
expected.Network.Ip = ""
status, err = ds.PodSandboxStatus(id)
assert.Equal(t, expected, status)

Expand All @@ -145,49 +143,6 @@ func TestSandboxStatus(t *testing.T) {
assert.Error(t, err, fmt.Sprintf("status of sandbox: %+v", status))
}

// TestSandboxStatusAfterRestart tests that retrieving sandbox status returns
// an IP address even if RunPodSandbox() was not yet called for this pod, as
// would happen on kubelet restart
func TestSandboxStatusAfterRestart(t *testing.T) {
ds, _, fClock := newTestDockerService()
config := makeSandboxConfig("foo", "bar", "1", 0)

// TODO: The following variables depend on the internal
// implementation of FakeDockerClient, and should be fixed.
fakeIP := "2.3.4.5"

state := runtimeapi.PodSandboxState_SANDBOX_READY
ct := int64(0)
hostNetwork := false
expected := &runtimeapi.PodSandboxStatus{
State: state,
CreatedAt: ct,
Metadata: config.Metadata,
Network: &runtimeapi.PodSandboxNetworkStatus{Ip: fakeIP},
Linux: &runtimeapi.LinuxPodSandboxStatus{Namespaces: &runtimeapi.Namespace{Options: &runtimeapi.NamespaceOption{HostNetwork: hostNetwork}}},
Labels: map[string]string{},
Annotations: map[string]string{},
}

// Create the sandbox.
fClock.SetTime(time.Now())
expected.CreatedAt = fClock.Now().UnixNano()

createConfig, err := ds.makeSandboxDockerConfig(config, defaultSandboxImage)
assert.NoError(t, err)

createResp, err := ds.client.CreateContainer(*createConfig)
assert.NoError(t, err)
err = ds.client.StartContainer(createResp.ID)
assert.NoError(t, err)

// Check status without RunPodSandbox() having set up networking
expected.Id = createResp.ID // ID is only known after the creation.
status, err := ds.PodSandboxStatus(createResp.ID)
assert.NoError(t, err)
assert.Equal(t, expected, status)
}

// TestNetworkPluginInvocation checks that the right SetUpPod and TearDownPod
// calls are made when we run/stop a sandbox.
func TestNetworkPluginInvocation(t *testing.T) {
Expand Down
13 changes: 3 additions & 10 deletions pkg/kubelet/dockershim/docker_service.go
Expand Up @@ -21,7 +21,6 @@ import (
"io"
"net/http"
"strconv"
"sync"
"time"

"github.com/blang/semver"
Expand Down Expand Up @@ -176,7 +175,6 @@ func NewDockerService(client libdocker.Interface, seccompProfileRoot string, pod
containerManager: cm.NewContainerManager(cgroupsName, client),
checkpointHandler: checkpointHandler,
disableSharedPID: disableSharedPID,
networkReady: make(map[string]bool),
}

// check docker version compatibility.
Expand Down Expand Up @@ -250,13 +248,8 @@ type dockerService struct {
podSandboxImage string
streamingRuntime *streamingRuntime
streamingServer streaming.Server

network *network.PluginManager
// Map of podSandboxID :: network-is-ready
networkReady map[string]bool
networkReadyLock sync.Mutex

containerManager cm.ContainerManager
network *network.PluginManager
containerManager cm.ContainerManager
// cgroup driver used by Docker runtime.
cgroupDriver string
checkpointHandler CheckpointHandler
Expand Down Expand Up @@ -322,7 +315,7 @@ func (ds *dockerService) GetNetNS(podSandboxID string) (string, error) {
if err != nil {
return "", err
}
return getNetworkNamespace(r)
return getNetworkNamespace(r), nil
}

// GetPodPortMappings returns the port mappings of the given podSandbox ID.
Expand Down
10 changes: 2 additions & 8 deletions pkg/kubelet/dockershim/docker_service_test.go
Expand Up @@ -46,14 +46,8 @@ func newTestDockerService() (*dockerService, *libdocker.FakeDockerClient, *clock
fakeClock := clock.NewFakeClock(time.Time{})
c := libdocker.NewFakeDockerClient().WithClock(fakeClock).WithVersion("1.11.2", "1.23")
pm := network.NewPluginManager(&network.NoopNetworkPlugin{})
return &dockerService{
client: c,
os: &containertest.FakeOS{},
network: pm,
legacyCleanup: legacyCleanupFlag{done: 1},
checkpointHandler: NewTestPersistentCheckpointHandler(),
networkReady: make(map[string]bool),
}, c, fakeClock
return &dockerService{client: c, os: &containertest.FakeOS{}, network: pm,
legacyCleanup: legacyCleanupFlag{done: 1}, checkpointHandler: NewTestPersistentCheckpointHandler()}, c, fakeClock
}

func newTestDockerServiceWithVersionCache() (*dockerService, *libdocker.FakeDockerClient, *clock.FakeClock) {
Expand Down
10 changes: 6 additions & 4 deletions pkg/kubelet/dockershim/helpers.go
Expand Up @@ -266,12 +266,14 @@ func getApparmorSecurityOpts(sc *runtimeapi.LinuxContainerSecurityContext, separ
return fmtOpts, nil
}

func getNetworkNamespace(c *dockertypes.ContainerJSON) (string, error) {
func getNetworkNamespace(c *dockertypes.ContainerJSON) string {
if c.State.Pid == 0 {
// Docker reports pid 0 for an exited container.
return "", fmt.Errorf("Cannot find network namespace for the terminated container %q", c.ID)
// Docker reports pid 0 for an exited container. We can't use it to
// check the network namespace, so return an empty string instead.
glog.V(4).Infof("Cannot find network namespace for the terminated container %q", c.ID)
return ""
}
return fmt.Sprintf(dockerNetNSFmt, c.State.Pid), nil
return fmt.Sprintf(dockerNetNSFmt, c.State.Pid)
}

// dockerFilter wraps around dockerfilters.Args and provides methods to modify
Expand Down
4 changes: 1 addition & 3 deletions pkg/kubelet/network/cni/cni.go
Expand Up @@ -251,11 +251,9 @@ func (plugin *cniNetworkPlugin) TearDownPod(namespace string, name string, id ku
if err := plugin.checkInitialized(); err != nil {
return err
}

// Lack of namespace should not be fatal on teardown
netnsPath, err := plugin.host.GetNetNS(id.ID)
if err != nil {
glog.Warningf("CNI failed to retrieve network namespace path: %v", err)
return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err)
}

return plugin.deleteFromNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath)
Expand Down
8 changes: 4 additions & 4 deletions pkg/kubelet/network/kubenet/kubenet_linux.go
Expand Up @@ -741,9 +741,9 @@ func podIsExited(p *kubecontainer.Pod) bool {
return true
}

func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID, needNetNs bool) (*libcni.RuntimeConf, error) {
func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID) (*libcni.RuntimeConf, error) {
netnsPath, err := plugin.host.GetNetNS(id.ID)
if needNetNs && err != nil {
if err != nil {
glog.Errorf("Kubenet failed to retrieve network namespace path: %v", err)
}

Expand All @@ -755,7 +755,7 @@ func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubeco
}

func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) (cnitypes.Result, error) {
rt, err := plugin.buildCNIRuntimeConf(ifName, id, true)
rt, err := plugin.buildCNIRuntimeConf(ifName, id)
if err != nil {
return nil, fmt.Errorf("Error building CNI config: %v", err)
}
Expand All @@ -769,7 +769,7 @@ func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.Network
}

func (plugin *kubenetNetworkPlugin) delContainerFromNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) error {
rt, err := plugin.buildCNIRuntimeConf(ifName, id, false)
rt, err := plugin.buildCNIRuntimeConf(ifName, id)
if err != nil {
return fmt.Errorf("Error building CNI config: %v", err)
}
Expand Down
2 changes: 0 additions & 2 deletions pkg/kubelet/network/plugins.go
Expand Up @@ -144,8 +144,6 @@ type Host interface {
// CNI plugin wrappers like kubenet.
type NamespaceGetter interface {
// GetNetNS returns network namespace information for the given containerID.
// Runtimes should *never* return an empty namespace and nil error for
// a container; if error is nil then the namespace string must be valid.
GetNetNS(containerID string) (string, error)
}

Expand Down