Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kubelet bootstrap: start hostNetwork pods before we have PodCIDR #35526

Merged
merged 3 commits into from
Nov 6, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 0 additions & 2 deletions cluster/aws/templates/configure-vm-aws.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ EOF
if [[ ! -z "${KUBELET_APISERVER:-}" ]] && [[ ! -z "${KUBELET_CERT:-}" ]] && [[ ! -z "${KUBELET_KEY:-}" ]]; then
cat <<EOF >>/etc/salt/minion.d/grains.conf
kubelet_api_servers: '${KUBELET_APISERVER}'
cbr-cidr: 10.123.45.0/29
EOF
else
# If the kubelet is running disconnected from a master, give it a fixed
Expand All @@ -110,7 +109,6 @@ salt-node-role() {
grains:
roles:
- kubernetes-pool
cbr-cidr: 10.123.45.0/29
cloud: aws
api_servers: '${API_SERVERS}'
EOF
Expand Down
2 changes: 0 additions & 2 deletions cluster/gce/configure-vm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,6 @@ EOF
if [[ ! -z "${KUBELET_APISERVER:-}" ]] && [[ ! -z "${KUBELET_CERT:-}" ]] && [[ ! -z "${KUBELET_KEY:-}" ]]; then
cat <<EOF >>/etc/salt/minion.d/grains.conf
kubelet_api_servers: '${KUBELET_APISERVER}'
cbr-cidr: 10.123.45.0/29
EOF
else
# If the kubelet is running disconnected from a master, give it a fixed
Expand All @@ -973,7 +972,6 @@ function salt-node-role() {
grains:
roles:
- kubernetes-pool
cbr-cidr: 10.123.45.0/29
cloud: gce
api_servers: '${KUBERNETES_MASTER_NAME}'
EOF
Expand Down
5 changes: 1 addition & 4 deletions cluster/gce/gci/configure-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -484,11 +484,8 @@ function start-kubelet {
if [[ ! -z "${KUBELET_APISERVER:-}" && ! -z "${KUBELET_CERT:-}" && ! -z "${KUBELET_KEY:-}" ]]; then
flags+=" --api-servers=https://${KUBELET_APISERVER}"
flags+=" --register-schedulable=false"
# need at least a /29 pod cidr for now due to #32844
# TODO: determine if we still allow non-hostnetwork pods to run on master, clean up master pod setup
# WARNING: potential ip range collision with 10.123.45.0/29
flags+=" --pod-cidr=10.123.45.0/29"
else
# Standalone mode (not widely used?)
flags+=" --pod-cidr=${MASTER_IP_RANGE}"
fi
else # For nodes
Expand Down
2 changes: 1 addition & 1 deletion cluster/gce/trusty/configure-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ assemble_kubelet_flags() {
if [ ! -z "${KUBELET_APISERVER:-}" ] && \
[ ! -z "${KUBELET_CERT:-}" ] && \
[ ! -z "${KUBELET_KEY:-}" ]; then
KUBELET_CMD_FLAGS="${KUBELET_CMD_FLAGS} --api-servers=https://${KUBELET_APISERVER} --register-schedulable=false --pod-cidr=10.123.45.0/29"
KUBELET_CMD_FLAGS="${KUBELET_CMD_FLAGS} --api-servers=https://${KUBELET_APISERVER} --register-schedulable=false"
else
KUBELET_CMD_FLAGS="${KUBELET_CMD_FLAGS} --pod-cidr=${MASTER_IP_RANGE}"
fi
Expand Down
7 changes: 6 additions & 1 deletion pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -1393,6 +1393,11 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
return errOuter
}

// If the network plugin is not ready, only start the pod if it uses the host network
if rs := kl.runtimeState.networkErrors(); len(rs) != 0 && !podUsesHostNetwork(pod) {
return fmt.Errorf("network is not ready: %v", rs)
}

// Create Cgroups for the pod and apply resource parameters
// to them if cgroup-per-qos flag is enabled.
pcm := kl.containerManager.NewPodContainerManager()
Expand Down Expand Up @@ -1644,7 +1649,7 @@ func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHand
defer housekeepingTicker.Stop()
plegCh := kl.pleg.Watch()
for {
if rs := kl.runtimeState.errors(); len(rs) != 0 {
if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 {
glog.Infof("skipping pod synchronization - %v", rs)
time.Sleep(5 * time.Second)
continue
Expand Down
3 changes: 2 additions & 1 deletion pkg/kubelet/kubelet_node_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,8 @@ func (kl *Kubelet) setNodeReadyCondition(node *api.Node) {
// ref: https://github.com/kubernetes/kubernetes/issues/16961
currentTime := unversioned.NewTime(kl.clock.Now())
var newNodeReadyCondition api.NodeCondition
if rs := kl.runtimeState.errors(); len(rs) == 0 {
rs := append(kl.runtimeState.runtimeErrors(), kl.runtimeState.networkErrors()...)
if len(rs) == 0 {
newNodeReadyCondition = api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionTrue,
Expand Down
42 changes: 42 additions & 0 deletions pkg/kubelet/kubelet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,48 @@ func TestPrivilegedContainerDisallowed(t *testing.T) {
assert.Error(t, err, "expected pod infra creation to fail")
}

func TestNetworkErrorsWithoutHostNetwork(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
testKubelet.fakeCadvisor.On("VersionInfo").Return(&cadvisorapi.VersionInfo{}, nil)
testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorapi.MachineInfo{}, nil)
testKubelet.fakeCadvisor.On("ImagesFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
testKubelet.fakeCadvisor.On("RootFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
kubelet := testKubelet.kubelet

kubelet.runtimeState.setNetworkState(fmt.Errorf("simulated network error"))
capabilities.SetForTests(capabilities.Capabilities{
PrivilegedSources: capabilities.PrivilegedSources{
HostNetworkSources: []string{kubetypes.ApiserverSource, kubetypes.FileSource},
},
})

pod := podWithUidNameNsSpec("12345678", "hostnetwork", "new", api.PodSpec{
SecurityContext: &api.PodSecurityContext{
HostNetwork: false,
},
Containers: []api.Container{
{Name: "foo"},
},
})

kubelet.podManager.SetPods([]*api.Pod{pod})
err := kubelet.syncPod(syncPodOptions{
pod: pod,
podStatus: &kubecontainer.PodStatus{},
updateType: kubetypes.SyncPodUpdate,
})
assert.Error(t, err, "expected pod with hostNetwork=false to fail when network in error")

pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = kubetypes.FileSource
pod.Spec.SecurityContext.HostNetwork = true
err = kubelet.syncPod(syncPodOptions{
pod: pod,
podStatus: &kubecontainer.PodStatus{},
updateType: kubetypes.SyncPodUpdate,
})
assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error")
}

func TestFilterOutTerminatedPods(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
kubelet := testKubelet.kubelet
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/runonce_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ func TestRunOnce(t *testing.T) {
kubeClient: &fake.Clientset{},
hostname: testKubeletHostname,
nodeName: testKubeletHostname,
runtimeState: newRuntimeState(time.Second),
}
kb.containerManager = cm.NewStubContainerManager()

Expand Down
15 changes: 11 additions & 4 deletions pkg/kubelet/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,13 @@ func (s *runtimeState) setInitError(err error) {
s.initError = err
}

func (s *runtimeState) errors() []string {
func (s *runtimeState) runtimeErrors() []string {
s.RLock()
defer s.RUnlock()
var ret []string
if s.initError != nil {
ret = append(ret, s.initError.Error())
}
if s.networkError != nil {
ret = append(ret, s.networkError.Error())
}
if !s.lastBaseRuntimeSync.Add(s.baseRuntimeSyncThreshold).After(time.Now()) {
ret = append(ret, "container runtime is down")
}
Expand All @@ -87,6 +84,16 @@ func (s *runtimeState) errors() []string {
return ret
}

func (s *runtimeState) networkErrors() []string {
s.RLock()
defer s.RUnlock()
var ret []string
if s.networkError != nil {
ret = append(ret, s.networkError.Error())
}
return ret
}

func newRuntimeState(
runtimeSyncThreshold time.Duration,
) *runtimeState {
Expand Down