Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #32363 #32359 #32239 #31828 #32351 #32448

2 changes: 1 addition & 1 deletion build/build-image/cross/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ RUN mkdir $TMPDIR \
github.com/jteeuwen/go-bindata/go-bindata

# Download and symlink etcd. We need this for our integration tests.
RUN export ETCD_VERSION=v3.0.4; \
RUN export ETCD_VERSION=v2.3.7; \
mkdir -p /usr/local/src/etcd \
&& cd /usr/local/src/etcd \
&& curl -fsSL https://github.com/coreos/etcd/releases/download/${ETCD_VERSION}/etcd-${ETCD_VERSION}-linux-amd64.tar.gz | tar -xz \
Expand Down
2 changes: 1 addition & 1 deletion build/build-image/cross/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v1.6.3-2
v1.6.3-5
3 changes: 2 additions & 1 deletion cluster/aws/config-default.sh
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,9 @@ COREOS_CHANNEL="${COREOS_CHANNEL:-alpha}"
CONTAINER_RUNTIME="${KUBE_CONTAINER_RUNTIME:-docker}"
RKT_VERSION="${KUBE_RKT_VERSION:-0.5.5}"

NETWORK_PROVIDER="${NETWORK_PROVIDER:-kubenet}" # kubenet, opencontrail, flannel

# OpenContrail networking plugin specific settings
NETWORK_PROVIDER="${NETWORK_PROVIDER:-none}" # opencontrail
OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}"
OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}"
OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}"
Expand Down
7 changes: 7 additions & 0 deletions cluster/aws/config-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,5 +139,12 @@ COREOS_CHANNEL="${COREOS_CHANNEL:-alpha}"
CONTAINER_RUNTIME="${KUBE_CONTAINER_RUNTIME:-docker}"
RKT_VERSION="${KUBE_RKT_VERSION:-0.5.5}"

NETWORK_PROVIDER="${NETWORK_PROVIDER:-kubenet}" # kubenet, opencontrail, flannel

# OpenContrail networking plugin specific settings
OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}"
OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}"
OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}"

# Optional: if set to true, kube-up will configure the cluster to run e2e tests.
E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false}
2 changes: 1 addition & 1 deletion cluster/centos/config-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ RELEASES_DIR=${RELEASES_DIR:-/tmp/downloads}
FLANNEL_VERSION=${FLANNEL_VERSION:-"0.5.5"}

# Define etcd version to use.
ETCD_VERSION=${ETCD_VERSION:-"3.0.4"}
ETCD_VERSION=${ETCD_VERSION:-"2.3.7"}

# Define k8s version to use.
K8S_VERSION=${K8S_VERSION:-"1.1.1"}
Expand Down
2 changes: 1 addition & 1 deletion cluster/gce/coreos/kube-manifests/etcd-events.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ spec:
--listen-client-urls=http://127.0.0.1:4002
--data-dir=/var/etcd/data-events
1>>/var/log/etcd-events.log 2>&1
image: gcr.io/google_containers/etcd:3.0.4
image: gcr.io/google_containers/etcd:2.3.7
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:
Expand Down
2 changes: 1 addition & 1 deletion cluster/gce/coreos/kube-manifests/etcd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ spec:
--listen-client-urls=http://127.0.0.1:2379
--data-dir=/var/etcd/data
1>>/var/log/etcd.log 2>&1
image: gcr.io/google_containers/etcd:3.0.4
image: gcr.io/google_containers/etcd:2.3.7
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:
Expand Down
2 changes: 1 addition & 1 deletion cluster/images/hyperkube/static-pods/etcd.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"containers": [
{
"name": "etcd",
"image": "gcr.io/google_containers/etcd-ARCH:3.0.4",
"image": "gcr.io/google_containers/etcd-ARCH:2.3.7",
"command": [
"/usr/local/bin/etcd",
"--listen-client-urls=http://127.0.0.1:2379,http://127.0.0.1:4001",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,8 @@
"-c",
"./cluster-autoscaler --kubernetes=http://127.0.0.1:8080?inClusterConfig=f --v=4 {{params}} 1>>/var/log/cluster-autoscaler.log 2>&1"
],
# TODO: Make resource requirements depend on the size of the cluster
"resources": {
"limits": {
"cpu": "100m",
"memory": "300Mi"
},
"requests": {
"cpu": "20m",
"memory": "300Mi"
Expand Down
2 changes: 1 addition & 1 deletion cluster/saltbase/salt/etcd/etcd.manifest
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"containers":[
{
"name": "etcd-container",
"image": "gcr.io/google_containers/etcd:{{ pillar.get('etcd_docker_tag', '3.0.4') }}",
"image": "gcr.io/google_containers/etcd:{{ pillar.get('etcd_docker_tag', '2.3.7') }}",
"resources": {
"requests": {
"cpu": {{ cpulimit }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
},
{
"name": "etcd-container",
"image": "gcr.io/google_containers/etcd:3.0.4",
"image": "gcr.io/google_containers/etcd:2.3.7",
"command": [
"/bin/sh",
"-c",
Expand Down
6 changes: 2 additions & 4 deletions cluster/saltbase/salt/l7-gcp/glbc.manifest
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@ spec:
name: logfile
readOnly: false
resources:
# Request and limits are set to accomodate this pod alongside the other
# Request is set to accomodate this pod alongside the other
# master components on a single core master.
limits:
cpu: 100m
memory: 100Mi
# TODO: Make resource requirements depend on the size of the cluster
requests:
cpu: 10m
memory: 50Mi
Expand Down
4 changes: 1 addition & 3 deletions cluster/saltbase/salt/rescheduler/rescheduler.manifest
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@ spec:
- mountPath: /var/log/rescheduler.log
name: logfile
readOnly: false
# TODO: Make resource requirements depend on the size of the cluster
resources:
limits:
cpu: 100m
memory: 300Mi
requests:
cpu: 10m
memory: 100Mi
Expand Down
2 changes: 1 addition & 1 deletion hack/lib/etcd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

# A set of helpers for starting/running etcd for tests

ETCD_VERSION=${ETCD_VERSION:-3.0.4}
ETCD_VERSION=${ETCD_VERSION:-2.3.7}
ETCD_HOST=${ETCD_HOST:-127.0.0.1}
ETCD_PORT=${ETCD_PORT:-2379}

Expand Down
25 changes: 14 additions & 11 deletions hack/test-update-storage-objects.sh
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,15 @@ killApiServer
# We always perform offline migration, so we need to stop etcd.
#######################################################

kube::etcd::stop
TARGET_STORAGE="etcd3" \
DATA_DIRECTORY="${ETCD_DIR}" \
ETCD=$(which etcd) \
ETCDCTL=$(which etcdctl) \
ATTACHLEASE="${KUBE_OUTPUT_HOSTBIN}/attachlease" \
${KUBE_ROOT}/cluster/images/etcd/migrate-if-needed.sh
kube::etcd::start
# TODO: Uncomment once we support migration.
#kube::etcd::stop
#TARGET_STORAGE="etcd3" \
# DATA_DIRECTORY="${ETCD_DIR}" \
# ETCD=$(which etcd) \
# ETCDCTL=$(which etcdctl) \
# ATTACHLEASE="${KUBE_OUTPUT_HOSTBIN}/attachlease" \
# ${KUBE_ROOT}/cluster/images/etcd/migrate-if-needed.sh
#kube::etcd::start


#######################################################
Expand All @@ -175,7 +176,8 @@ kube::etcd::start

KUBE_API_VERSIONS="${KUBE_NEW_API_VERSION},${KUBE_OLD_API_VERSION}"
RUNTIME_CONFIG="api/all=false,api/${KUBE_OLD_API_VERSION}=true,api/${KUBE_NEW_API_VERSION}=true"
startApiServer ${STORAGE_BACKEND_ETCD3} ${KUBE_NEW_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_JSON}
# TODO: Switch to STORAGE_BACKEND_ETCD3 once we support it.
startApiServer ${STORAGE_BACKEND_ETCD2} ${KUBE_NEW_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_JSON}

# Update etcd objects, so that will now be stored in the new api version.
kube::log::status "Updating storage versions in etcd"
Expand All @@ -190,7 +192,7 @@ for test in ${tests[@]}; do
new_storage_version=${test_data[5]}

kube::log::status "Verifying ${resource}/${namespace}/${name} has updated storage version ${new_storage_version} in etcd"
ETCDCTL_API=3 ${ETCDCTL} --endpoints="${ETCD_HOST}:${ETCD_PORT}" get "/${ETCD_PREFIX}/${resource}/${namespace}/${name}" | grep ${new_storage_version}
${ETCDCTL} --endpoints="${ETCD_HOST}:${ETCD_PORT}" get "/${ETCD_PREFIX}/${resource}/${namespace}/${name}" | grep ${new_storage_version}
done

killApiServer
Expand All @@ -206,7 +208,8 @@ RUNTIME_CONFIG="api/all=false,api/${KUBE_NEW_API_VERSION}=true"

# This seems to reduce flakiness.
sleep 1
startApiServer ${STORAGE_BACKEND_ETCD3} ${KUBE_NEW_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_PROTOBUF}
# TODO: Switch to STORAGE_BACKEND_ETCD3 once we support it.
startApiServer ${STORAGE_BACKEND_ETCD2} ${KUBE_NEW_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_PROTOBUF}

for test in ${tests[@]}; do
IFS=',' read -ra test_data <<<"$test"
Expand Down
82 changes: 59 additions & 23 deletions pkg/cloudprovider/providers/gce/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,37 @@ func (gce *GCECloud) EnsureLoadBalancer(clusterName string, apiService *api.Serv
glog.Infof("Target pool %v for Service %v/%v doesn't exist", loadBalancerName, apiService.Namespace, apiService.Name)
}

// Ensure health checks are created for this target pool to pass to createTargetPool for health check links
// Alternately, if the annotation on the service was removed, we need to recreate the target pool without
// health checks. This needs to be prior to the forwarding rule deletion below otherwise it is not possible
// to delete just the target pool or http health checks later.
var hcToCreate *compute.HttpHealthCheck
hcExisting, err := gce.GetHttpHealthCheck(loadBalancerName)
if err != nil && !isHTTPErrorCode(err, http.StatusNotFound) {
return nil, fmt.Errorf("Error checking HTTP health check %s: %v", loadBalancerName, err)
}
if path, healthCheckNodePort := apiservice.GetServiceHealthCheckPathPort(apiService); path != "" {
glog.V(4).Infof("service %v needs health checks on :%d/%s)", apiService.Name, healthCheckNodePort, path)
if err != nil {
// This logic exists to detect a transition for a pre-existing service and turn on
// the tpNeedsUpdate flag to delete/recreate fwdrule/tpool adding the health check
// to the target pool.
glog.V(2).Infof("Annotation %s=%s added to new or pre-existing service",
apiservice.AnnotationExternalTraffic,
apiservice.AnnotationValueExternalTrafficLocal)
tpNeedsUpdate = true
}
hcToCreate, err = gce.ensureHttpHealthCheck(loadBalancerName, path, healthCheckNodePort)
if err != nil {
return nil, fmt.Errorf("Failed to ensure health check for localized service %v on node port %v: %v", loadBalancerName, healthCheckNodePort, err)
}
} else {
glog.V(4).Infof("service %v does not need health checks", apiService.Name)
if err == nil {
glog.V(2).Infof("Deleting stale health checks for service %v LB %v", apiService.Name, loadBalancerName)
tpNeedsUpdate = true
}
}
// Now we get to some slightly more interesting logic.
// First, neither target pools nor forwarding rules can be updated in place -
// they have to be deleted and recreated.
Expand All @@ -738,17 +769,16 @@ func (gce *GCECloud) EnsureLoadBalancer(clusterName string, apiService *api.Serv
}
if tpExists && tpNeedsUpdate {
// Generate the list of health checks for this target pool to pass to deleteTargetPool
var hc *compute.HttpHealthCheck
if path, _ := apiservice.GetServiceHealthCheckPathPort(apiService); path != "" {
var err error
hc, err = gce.GetHttpHealthCheck(loadBalancerName)
hcExisting, err = gce.GetHttpHealthCheck(loadBalancerName)
if err != nil && !isHTTPErrorCode(err, http.StatusNotFound) {
glog.Infof("Failed to retrieve health check %v:%v", loadBalancerName, err)
}
}

// Pass healthchecks to deleteTargetPool to cleanup health checks prior to cleaning up the target pool itself.
if err := gce.deleteTargetPool(loadBalancerName, gce.region, hc); err != nil {
if err := gce.deleteTargetPool(loadBalancerName, gce.region, hcExisting); err != nil {
return nil, fmt.Errorf("failed to delete existing target pool %s for load balancer update: %v", loadBalancerName, err)
}
glog.Infof("EnsureLoadBalancer(%v(%v)): deleted target pool", loadBalancerName, serviceName)
Expand All @@ -761,21 +791,13 @@ func (gce *GCECloud) EnsureLoadBalancer(clusterName string, apiService *api.Serv
if len(hosts) > maxTargetPoolCreateInstances {
createInstances = createInstances[:maxTargetPoolCreateInstances]
}

// Create health checks for this target pool to pass to createTargetPool for health check links
var hc *compute.HttpHealthCheck
if path, healthCheckNodePort := apiservice.GetServiceHealthCheckPathPort(apiService); path != "" {
glog.Infof("service %v needs health checks on :%d/%s)", apiService.Name, healthCheckNodePort, path)
var err error
hc, err = gce.ensureHttpHealthCheck(loadBalancerName, path, healthCheckNodePort)
if err != nil {
return nil, fmt.Errorf("Failed to create health check for localized service %v on node port %v: %v", loadBalancerName, healthCheckNodePort, err)
}
}
// Pass healthchecks to createTargetPool which needs them as health check links in the target pool
if err := gce.createTargetPool(loadBalancerName, serviceName.String(), gce.region, createInstances, affinityType, hc); err != nil {
if err := gce.createTargetPool(loadBalancerName, serviceName.String(), gce.region, createInstances, affinityType, hcToCreate); err != nil {
return nil, fmt.Errorf("failed to create target pool %s: %v", loadBalancerName, err)
}
if hcToCreate != nil {
glog.Infof("EnsureLoadBalancer(%v(%v)): created health checks for target pool", loadBalancerName, serviceName)
}
if len(hosts) <= maxTargetPoolCreateInstances {
glog.Infof("EnsureLoadBalancer(%v(%v)): created target pool", loadBalancerName, serviceName)
} else {
Expand Down Expand Up @@ -838,17 +860,21 @@ func (gce *GCECloud) ensureHttpHealthCheck(name, path string, port int32) (hc *c
glog.Errorf("Failed to get http health check %v", err)
return nil, err
}
glog.Infof("Created HTTP health check %v healthCheckNodePort: %d", name, port)
return hc, nil
}
// Validate health check fields
glog.V(4).Infof("Checking http health check params %s", name)
drift := hc.Port != int64(port) || hc.RequestPath != path || hc.Description != makeHealthCheckDescription(name)
drift = drift || hc.CheckIntervalSec != gceHcCheckIntervalSeconds || hc.TimeoutSec != gceHcTimeoutSeconds
drift = drift || hc.UnhealthyThreshold != gceHcUnhealthyThreshold || hc.HealthyThreshold != gceHcHealthyThreshold
if drift {
glog.Infof("Health check %v exists but parameters have drifted - updating", name)
glog.Warningf("Health check %v exists but parameters have drifted - updating...", name)
if err := gce.UpdateHttpHealthCheck(newHC); err != nil {
glog.Warningf("Failed to reconcile http health check %v parameters", name)
return nil, err
}
glog.V(4).Infof("Corrected health check %v parameters successful", name)
}
return hc, nil
}
Expand Down Expand Up @@ -1421,13 +1447,6 @@ func (gce *GCECloud) deleteForwardingRule(name, region string) error {
}

func (gce *GCECloud) deleteTargetPool(name, region string, hc *compute.HttpHealthCheck) error {
if hc != nil {
glog.Infof("Deleting health check %v", hc.Name)
if err := gce.DeleteHttpHealthCheck(hc.Name); err != nil {
glog.Warningf("Failed to delete health check %v: %v", hc, err)
return err
}
}
op, err := gce.service.TargetPools.Delete(gce.projectID, region, name).Do()
if err != nil && isHTTPErrorCode(err, http.StatusNotFound) {
glog.Infof("Target pool %s already deleted. Continuing to delete other resources.", name)
Expand All @@ -1440,6 +1459,23 @@ func (gce *GCECloud) deleteTargetPool(name, region string, hc *compute.HttpHealt
return err
}
}
// Deletion of health checks is allowed only after the TargetPool reference is deleted
if hc != nil {
glog.Infof("Deleting health check %v", hc.Name)
if err := gce.DeleteHttpHealthCheck(hc.Name); err != nil {
glog.Warningf("Failed to delete health check %v: %v", hc, err)
return err
}
} else {
// This is a HC cleanup attempt to prevent stale HCs when errors are encountered
// during HC deletion in a prior pass through EnsureLoadBalancer.
// The HC name matches the load balancer name - normally this is expected to fail.
if err := gce.DeleteHttpHealthCheck(name); err == nil {
// We only print a warning if this deletion actually succeeded (which
// means there was indeed a stale health check with the LB name.
glog.Warningf("Deleted stale http health check for LB: %s", name)
}
}
return nil
}

Expand Down
10 changes: 10 additions & 0 deletions pkg/proxy/iptables/proxier.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,10 @@ func (proxier *Proxier) sameConfig(info *serviceInfo, service *api.Service, port
if info.sessionAffinityType != service.Spec.SessionAffinity {
return false
}
onlyNodeLocalEndpoints := apiservice.NeedsHealthCheck(service) && featuregate.DefaultFeatureGate.ExternalTrafficLocalOnly()
if info.onlyNodeLocalEndpoints != onlyNodeLocalEndpoints {
return false
}
return true
}

Expand Down Expand Up @@ -446,6 +450,9 @@ func (proxier *Proxier) OnServiceUpdate(allServices []api.Service) {
// Turn on healthcheck responder to listen on the health check nodePort
healthcheck.AddServiceListener(serviceName.NamespacedName, info.healthCheckNodePort)
}
} else {
// Delete healthcheck responders, if any, previously listening for this service
healthcheck.DeleteServiceListener(serviceName.NamespacedName, 0)
}
proxier.serviceMap[serviceName] = info

Expand Down Expand Up @@ -895,6 +902,9 @@ func (proxier *Proxier) syncProxyRules() {
writeLine(natChains, utiliptables.MakeChainLine(svcXlbChain))
}
activeNATChains[svcXlbChain] = true
} else if activeNATChains[svcXlbChain] {
// Cleanup the previously created XLB chain for this service
delete(activeNATChains, svcXlbChain)
}

// Capture the clusterIP.
Expand Down
2 changes: 1 addition & 1 deletion pkg/registry/generic/registry/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -599,9 +599,9 @@ func (e *Store) updateForGracefulDeletionAndFinalizers(ctx api.Context, name, ke
existingAccessor.SetFinalizers(newFinalizers)
}

pendingFinalizers = len(existingAccessor.GetFinalizers()) != 0
if !graceful {
// set the DeleteGracePeriods to 0 if the object has pendingFinalizers but not supporting graceful deletion
pendingFinalizers = len(existingAccessor.GetFinalizers()) != 0
if pendingFinalizers {
glog.V(6).Infof("update the DeletionTimestamp to \"now\" and GracePeriodSeconds to 0 for object %s, because it has pending finalizers", name)
err = markAsDeleting(existing)
Expand Down