Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GCE: Gracefully handle permission errors when attempting to create firewall rules #51562

Merged
merged 1 commit into from Sep 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions pkg/cloudprovider/providers/gce/BUILD
Expand Up @@ -76,7 +76,10 @@ go_library(
"//vendor/k8s.io/apiserver/pkg/server/options/encryptionconfig:go_default_library",
"//vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
],
)
Expand Down
16 changes: 16 additions & 0 deletions pkg/cloudprovider/providers/gce/gce.go
Expand Up @@ -30,10 +30,15 @@ import (

"cloud.google.com/go/compute/metadata"

"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/server/options/encryptionconfig"
"k8s.io/apiserver/pkg/storage/value/encrypt/envelope"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/flowcontrol"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/controller"
Expand Down Expand Up @@ -99,7 +104,10 @@ type GCECloud struct {
serviceAlpha *computealpha.Service
containerService *container.Service
cloudkmsService *cloudkms.Service
client clientset.Interface
clientBuilder controller.ControllerClientBuilder
eventBroadcaster record.EventBroadcaster
eventRecorder record.EventRecorder
projectID string
region string
localZone string // The zone in which we are running
Expand Down Expand Up @@ -531,6 +539,14 @@ func tryConvertToProjectNames(configProject, configNetworkProject string, servic
// This must be called before utilizing the funcs of gce.ClusterID
func (gce *GCECloud) Initialize(clientBuilder controller.ControllerClientBuilder) {
gce.clientBuilder = clientBuilder
gce.client = clientBuilder.ClientOrDie("cloud-provider")

if gce.OnXPN() {
gce.eventBroadcaster = record.NewBroadcaster()
gce.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(gce.client.Core().RESTClient()).Events("")})
gce.eventRecorder = gce.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "gce-cloudprovider"})
}

go gce.watchClusterID()
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/providers/gce/gce_clusterid.go
Expand Up @@ -62,7 +62,7 @@ type ClusterID struct {
func (gce *GCECloud) watchClusterID() {
gce.ClusterID = ClusterID{
cfgMapKey: fmt.Sprintf("%v/%v", UIDNamespace, UIDConfigMapName),
client: gce.clientBuilder.ClientOrDie("cloud-provider"),
client: gce.client,
}

mapEventHandler := cache.ResourceEventHandlerFuncs{
Expand Down
72 changes: 45 additions & 27 deletions pkg/cloudprovider/providers/gce/gce_loadbalancer_external.go
Expand Up @@ -181,13 +181,13 @@ func (gce *GCECloud) ensureExternalLoadBalancer(clusterName, clusterID string, a
// without needing to be deleted and recreated.
if firewallExists {
glog.Infof("EnsureLoadBalancer(%v(%v)): updating firewall", loadBalancerName, serviceName)
if err := gce.updateFirewall(makeFirewallName(loadBalancerName), gce.region, desc, sourceRanges, ports, hosts); err != nil {
if err := gce.updateFirewall(apiService, makeFirewallName(loadBalancerName), gce.region, desc, sourceRanges, ports, hosts); err != nil {
return nil, err
}
glog.Infof("EnsureLoadBalancer(%v(%v)): updated firewall", loadBalancerName, serviceName)
} else {
glog.Infof("EnsureLoadBalancer(%v(%v)): creating firewall", loadBalancerName, serviceName)
if err := gce.createFirewall(makeFirewallName(loadBalancerName), gce.region, desc, sourceRanges, ports, hosts); err != nil {
if err := gce.createFirewall(apiService, makeFirewallName(loadBalancerName), gce.region, desc, sourceRanges, ports, hosts); err != nil {
return nil, err
}
glog.Infof("EnsureLoadBalancer(%v(%v)): created firewall", loadBalancerName, serviceName)
Expand Down Expand Up @@ -259,7 +259,7 @@ func (gce *GCECloud) ensureExternalLoadBalancer(clusterName, clusterID string, a
if hcToDelete != nil {
hcNames = append(hcNames, hcToDelete.Name)
}
if err := gce.DeleteExternalTargetPoolAndChecks(loadBalancerName, gce.region, clusterID, hcNames...); err != nil {
if err := gce.DeleteExternalTargetPoolAndChecks(apiService, loadBalancerName, gce.region, clusterID, hcNames...); err != nil {
return nil, fmt.Errorf("failed to delete existing target pool %s for load balancer update: %v", loadBalancerName, err)
}
glog.Infof("EnsureLoadBalancer(%v(%v)): deleted target pool", loadBalancerName, serviceName)
Expand All @@ -273,7 +273,7 @@ func (gce *GCECloud) ensureExternalLoadBalancer(clusterName, clusterID string, a
createInstances = createInstances[:maxTargetPoolCreateInstances]
}
// Pass healthchecks to createTargetPool which needs them as health check links in the target pool
if err := gce.createTargetPool(loadBalancerName, serviceName.String(), ipAddressToUse, gce.region, clusterID, createInstances, affinityType, hcToCreate); err != nil {
if err := gce.createTargetPool(apiService, loadBalancerName, serviceName.String(), ipAddressToUse, gce.region, clusterID, createInstances, affinityType, hcToCreate); err != nil {
return nil, fmt.Errorf("failed to create target pool %s: %v", loadBalancerName, err)
}
if hcToCreate != nil {
Expand Down Expand Up @@ -355,7 +355,16 @@ func (gce *GCECloud) ensureExternalLoadBalancerDeleted(clusterName, clusterID st
}

errs := utilerrors.AggregateGoroutines(
func() error { return ignoreNotFound(gce.DeleteFirewall(makeFirewallName(loadBalancerName))) },
func() error {
fwName := makeFirewallName(loadBalancerName)
err := ignoreNotFound(gce.DeleteFirewall(fwName))
if isForbidden(err) && gce.OnXPN() {
glog.V(4).Infof("ensureExternalLoadBalancerDeleted(%v): do not have permission to delete firewall rule (on XPN). Raising event.", loadBalancerName)
gce.raiseFirewallChangeNeededEvent(service, FirewallToGCloudDeleteCmd(fwName, gce.NetworkProjectID()))
return nil
}
return err
},
// Even though we don't hold on to static IPs for load balancers, it's
// possible that EnsureLoadBalancer left one around in a failed
// creation/update attempt, so make sure we clean it up here just in case.
Expand All @@ -366,7 +375,7 @@ func (gce *GCECloud) ensureExternalLoadBalancerDeleted(clusterName, clusterID st
if err := ignoreNotFound(gce.DeleteRegionForwardingRule(loadBalancerName, gce.region)); err != nil {
return err
}
if err := gce.DeleteExternalTargetPoolAndChecks(loadBalancerName, gce.region, clusterID, hcNames...); err != nil {
if err := gce.DeleteExternalTargetPoolAndChecks(service, loadBalancerName, gce.region, clusterID, hcNames...); err != nil {
return err
}
return nil
Expand All @@ -378,7 +387,7 @@ func (gce *GCECloud) ensureExternalLoadBalancerDeleted(clusterName, clusterID st
return nil
}

func (gce *GCECloud) DeleteExternalTargetPoolAndChecks(name, region, clusterID string, hcNames ...string) error {
func (gce *GCECloud) DeleteExternalTargetPoolAndChecks(service *v1.Service, name, region, clusterID string, hcNames ...string) error {
if err := gce.DeleteTargetPool(name, region); err != nil && isHTTPErrorCode(err, http.StatusNotFound) {
glog.Infof("Target pool %s already deleted. Continuing to delete other resources.", name)
} else if err != nil {
Expand Down Expand Up @@ -420,9 +429,10 @@ func (gce *GCECloud) DeleteExternalTargetPoolAndChecks(name, region, clusterID s
// So we should delete the health check firewall as well.
fwName := MakeHealthCheckFirewallName(clusterID, hcName, isNodesHealthCheck)
glog.Infof("Deleting firewall %v.", fwName)
if err := gce.DeleteFirewall(fwName); err != nil {
if isHTTPErrorCode(err, http.StatusNotFound) {
glog.V(4).Infof("Firewall %v is already deleted.", fwName)
if err := ignoreNotFound(gce.DeleteFirewall(fwName)); err != nil {
if isForbidden(err) && gce.OnXPN() {
glog.V(4).Infof("DeleteExternalTargetPoolAndChecks(%v): do not have permission to delete firewall rule (on XPN). Raising event.", hcName)
gce.raiseFirewallChangeNeededEvent(service, FirewallToGCloudDeleteCmd(fwName, gce.NetworkProjectID()))
return nil
}
return err
Expand Down Expand Up @@ -486,7 +496,7 @@ func verifyUserRequestedIP(s CloudAddressService, region, requestedIP, fwdRuleIP
return false, fmt.Errorf("requested ip %q is neither static nor assigned to the LB", requestedIP)
}

func (gce *GCECloud) createTargetPool(name, serviceName, ipAddress, region, clusterID string, hosts []*gceInstance, affinityType v1.ServiceAffinity, hc *compute.HttpHealthCheck) error {
func (gce *GCECloud) createTargetPool(svc *v1.Service, name, serviceName, ipAddress, region, clusterID string, hosts []*gceInstance, affinityType v1.ServiceAffinity, hc *compute.HttpHealthCheck) error {
// health check management is coupled with targetPools to prevent leaks. A
// target pool is the only thing that requires a health check, so we delete
// associated checks on teardown, and ensure checks on setup.
Expand All @@ -499,10 +509,9 @@ func (gce *GCECloud) createTargetPool(name, serviceName, ipAddress, region, clus
gce.sharedResourceLock.Lock()
defer gce.sharedResourceLock.Unlock()
}
if !gce.OnXPN() {
if err := gce.ensureHttpHealthCheckFirewall(serviceName, ipAddress, region, clusterID, hosts, hc.Name, int32(hc.Port), isNodesHealthCheck); err != nil {
return err
}

if err := gce.ensureHttpHealthCheckFirewall(svc, serviceName, ipAddress, region, clusterID, hosts, hc.Name, int32(hc.Port), isNodesHealthCheck); err != nil {
return err
}
var err error
if hc, err = gce.ensureHttpHealthCheck(hc.Name, hc.RequestPath, int32(hc.Port)); err != nil || hc == nil {
Expand Down Expand Up @@ -751,11 +760,6 @@ func translateAffinityType(affinityType v1.ServiceAffinity) string {
}

func (gce *GCECloud) firewallNeedsUpdate(name, serviceName, region, ipAddress string, ports []v1.ServicePort, sourceRanges netsets.IPNet) (exists bool, needsUpdate bool, err error) {
if gce.OnXPN() {
glog.V(2).Infoln("firewallNeedsUpdate: Cluster is on XPN network - skipping firewall creation")
return false, false, nil
}

fw, err := gce.service.Firewalls.Get(gce.NetworkProjectID(), makeFirewallName(name)).Do()
if err != nil {
if isHTTPErrorCode(err, http.StatusNotFound) {
Expand Down Expand Up @@ -793,7 +797,7 @@ func (gce *GCECloud) firewallNeedsUpdate(name, serviceName, region, ipAddress st
return true, false, nil
}

func (gce *GCECloud) ensureHttpHealthCheckFirewall(serviceName, ipAddress, region, clusterID string, hosts []*gceInstance, hcName string, hcPort int32, isNodesHealthCheck bool) error {
func (gce *GCECloud) ensureHttpHealthCheckFirewall(svc *v1.Service, serviceName, ipAddress, region, clusterID string, hosts []*gceInstance, hcName string, hcPort int32, isNodesHealthCheck bool) error {
// Prepare the firewall params for creating / checking.
desc := fmt.Sprintf(`{"kubernetes.io/cluster-id":"%s"}`, clusterID)
if !isNodesHealthCheck {
Expand All @@ -809,7 +813,7 @@ func (gce *GCECloud) ensureHttpHealthCheckFirewall(serviceName, ipAddress, regio
return fmt.Errorf("error getting firewall for health checks: %v", err)
}
glog.Infof("Creating firewall %v for health checks.", fwName)
if err := gce.createFirewall(fwName, region, desc, sourceRanges, ports, hosts); err != nil {
if err := gce.createFirewall(svc, fwName, region, desc, sourceRanges, ports, hosts); err != nil {
return err
}
glog.Infof("Created firewall %v for health checks.", fwName)
Expand All @@ -822,7 +826,7 @@ func (gce *GCECloud) ensureHttpHealthCheckFirewall(serviceName, ipAddress, regio
!equalStringSets(fw.Allowed[0].Ports, []string{strconv.Itoa(int(ports[0].Port))}) ||
!equalStringSets(fw.SourceRanges, sourceRanges.StringSlice()) {
glog.Warningf("Firewall %v exists but parameters have drifted - updating...", fwName)
if err := gce.updateFirewall(fwName, region, desc, sourceRanges, ports, hosts); err != nil {
if err := gce.updateFirewall(svc, fwName, region, desc, sourceRanges, ports, hosts); err != nil {
glog.Warningf("Failed to reconcile firewall %v parameters.", fwName)
return err
}
Expand Down Expand Up @@ -870,24 +874,38 @@ func createForwardingRule(s CloudForwardingRuleService, name, serviceName, regio
return nil
}

func (gce *GCECloud) createFirewall(name, region, desc string, sourceRanges netsets.IPNet, ports []v1.ServicePort, hosts []*gceInstance) error {
func (gce *GCECloud) createFirewall(svc *v1.Service, name, region, desc string, sourceRanges netsets.IPNet, ports []v1.ServicePort, hosts []*gceInstance) error {
firewall, err := gce.firewallObject(name, region, desc, sourceRanges, ports, hosts)
if err != nil {
return err
}
if err = gce.CreateFirewall(firewall); err != nil && !isHTTPErrorCode(err, http.StatusConflict) {
if err = gce.CreateFirewall(firewall); err != nil {
if isHTTPErrorCode(err, http.StatusConflict) {
return nil
} else if isForbidden(err) && gce.OnXPN() {
glog.V(4).Infof("createFirewall(%v): do not have permission to create firewall rule (on XPN). Raising event.", firewall.Name)
gce.raiseFirewallChangeNeededEvent(svc, FirewallToGCloudCreateCmd(firewall, gce.NetworkProjectID()))
return nil
}
return err
}
return nil
}

func (gce *GCECloud) updateFirewall(name, region, desc string, sourceRanges netsets.IPNet, ports []v1.ServicePort, hosts []*gceInstance) error {
func (gce *GCECloud) updateFirewall(svc *v1.Service, name, region, desc string, sourceRanges netsets.IPNet, ports []v1.ServicePort, hosts []*gceInstance) error {
firewall, err := gce.firewallObject(name, region, desc, sourceRanges, ports, hosts)
if err != nil {
return err
}

if err = gce.UpdateFirewall(firewall); err != nil && !isHTTPErrorCode(err, http.StatusConflict) {
if err = gce.UpdateFirewall(firewall); err != nil {
if isHTTPErrorCode(err, http.StatusConflict) {
return nil
} else if isForbidden(err) && gce.OnXPN() {
glog.V(4).Infof("updateFirewall(%v): do not have permission to update firewall rule (on XPN). Raising event.", firewall.Name)
gce.raiseFirewallChangeNeededEvent(svc, FirewallToGCloudUpdateCmd(firewall, gce.NetworkProjectID()))
return nil
}
return err
}
return nil
Expand Down