Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict Azure NSG rules to allow external access only to load balancer IP #54177

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
105 changes: 62 additions & 43 deletions pkg/cloudprovider/providers/azure/azure_loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,42 +134,6 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod
serviceName := getServiceName(service)
glog.V(5).Infof("ensure(%s): START clusterName=%q lbName=%q", serviceName, clusterName, lbName)

az.operationPollRateLimiter.Accept()
glog.V(10).Infof("SecurityGroupsClient.Get(%q): start", az.SecurityGroupName)
sg, err := az.SecurityGroupsClient.Get(az.ResourceGroup, az.SecurityGroupName, "")
glog.V(10).Infof("SecurityGroupsClient.Get(%q): end", az.SecurityGroupName)
if err != nil {
return nil, err
}
sg, sgNeedsUpdate, err := az.reconcileSecurityGroup(sg, clusterName, service, true /* wantLb */)
if err != nil {
return nil, err
}
if sgNeedsUpdate {
glog.V(3).Infof("ensure(%s): sg(%s) - updating", serviceName, *sg.Name)
// azure-sdk-for-go introduced contraint validation which breaks the updating here if we don't set these
// to nil. This is a workaround until https://github.com/Azure/go-autorest/issues/112 is fixed
sg.SecurityGroupPropertiesFormat.NetworkInterfaces = nil
sg.SecurityGroupPropertiesFormat.Subnets = nil
az.operationPollRateLimiter.Accept()
glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): start", *sg.Name)
respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil)
resp := <-respChan
err := <-errChan
glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): end", *sg.Name)
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) {
glog.V(2).Infof("ensure(%s) backing off: sg(%s) - updating", serviceName, *sg.Name)
retryErr := az.CreateOrUpdateSGWithRetry(sg)
if retryErr != nil {
glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name)
return nil, retryErr
}
}
if err != nil {
return nil, err
}
}

lb, existsLb, err := az.getAzureLoadBalancer(lbName)
if err != nil {
return nil, err
Expand Down Expand Up @@ -257,6 +221,50 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod
}
}

var lbStatus *v1.LoadBalancerStatus
if lbIP == nil {
lbStatus, exists, err := az.GetLoadBalancer(clusterName, service)
if err != nil {
return nil, err
}
if !exists {
return nil, fmt.Errorf("ensure(%s): lb(%s) - failed to get back load balancer", serviceName, lbName)
}
lbIP = &lbStatus.Ingress[0].IP
}

az.operationPollRateLimiter.Accept()
glog.V(10).Infof("SecurityGroupsClient.Get(%q): start", az.SecurityGroupName)
sg, err := az.SecurityGroupsClient.Get(az.ResourceGroup, az.SecurityGroupName, "")
glog.V(10).Infof("SecurityGroupsClient.Get(%q): end", az.SecurityGroupName)
if err != nil {
return nil, err
}
sg, sgNeedsUpdate, err := az.reconcileSecurityGroup(sg, clusterName, service, lbIP, true /* wantLb */)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does locking down to the lbIP affect the delivery of the external IP to the node? The LB is set to "enableFloatingIP": true, and I believe this drops the external IP address on the box.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've tested the following scenarios:

  • Single node so the request is guaranteed to land on the right node
  • Multiple nodes so kubeproxy has to forward the request to the right node
  • Initiating a request from within the cluster to exercise the Node->LB->Node route

All of these are working, though I've only tested interactively.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From our conversation I think this addresses the routing cases that you wanted to check - let me know if I'm still not covering the bases!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also check to ensure that the source IP preservation still works with this:

https://kubernetes.io/docs/tutorials/services/source-ip/

Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran a test using the source-ip-app tool from the page you linked. Without the externalTrafficPolicy: Local setting, the tool reported client_address=10.244.0.1. With the externalTrafficPolicy: Local setting, the tool reported client_address=167.220.242.83. So I think source IP preservation is still okay. Thanks for flagging it!

(I'm still not sure how to automate this testing though. It feels like our testing matrix is getting more complex by the day and ad hoc hand testing won't cut it forever...)

if err != nil {
return nil, err
}
if sgNeedsUpdate {
glog.V(3).Infof("ensure(%s): sg(%s) - updating", serviceName, *sg.Name)
az.operationPollRateLimiter.Accept()
glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): start", *sg.Name)
respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil)
resp := <-respChan
err := <-errChan
glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): end", *sg.Name)
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) {
glog.V(2).Infof("ensure(%s) backing off: sg(%s) - updating", serviceName, *sg.Name)
retryErr := az.CreateOrUpdateSGWithRetry(sg)
if retryErr != nil {
glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name)
return nil, retryErr
}
}
if err != nil {
return nil, err
}
}

// Add the machines to the backend pool if they're not already
lbBackendName := getBackendPoolName(clusterName)
lbBackendPoolID := az.getBackendPoolID(lbName, lbBackendName)
Expand All @@ -280,6 +288,10 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod

glog.V(2).Infof("ensure(%s): lb(%s) finished", serviceName, lbName)

if lbStatus != nil {
return lbStatus, nil
}

if lbIP == nil {
lbStatus, exists, err := az.GetLoadBalancer(clusterName, service)
if err != nil {
Expand Down Expand Up @@ -325,16 +337,12 @@ func (az *Cloud) EnsureLoadBalancerDeleted(clusterName string, service *v1.Servi
return err
}
if existsSg {
reconciledSg, sgNeedsUpdate, reconcileErr := az.reconcileSecurityGroup(sg, clusterName, service, false /* wantLb */)
reconciledSg, sgNeedsUpdate, reconcileErr := az.reconcileSecurityGroup(sg, clusterName, service, nil, false /* wantLb */)
if reconcileErr != nil {
return reconcileErr
}
if sgNeedsUpdate {
glog.V(3).Infof("delete(%s): sg(%s) - updating", serviceName, az.SecurityGroupName)
// azure-sdk-for-go introduced contraint validation which breaks the updating here if we don't set these
// to nil. This is a workaround until https://github.com/Azure/go-autorest/issues/112 is fixed
sg.SecurityGroupPropertiesFormat.NetworkInterfaces = nil
sg.SecurityGroupPropertiesFormat.Subnets = nil
az.operationPollRateLimiter.Accept()
glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): start", *reconciledSg.Name)
respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *reconciledSg.Name, reconciledSg, nil)
Expand Down Expand Up @@ -784,7 +792,7 @@ func (az *Cloud) reconcileLoadBalancer(lb network.LoadBalancer, fipConfiguration

// This reconciles the Network Security Group similar to how the LB is reconciled.
// This entails adding required, missing SecurityRules and removing stale rules.
func (az *Cloud) reconcileSecurityGroup(sg network.SecurityGroup, clusterName string, service *v1.Service, wantLb bool) (network.SecurityGroup, bool, error) {
func (az *Cloud) reconcileSecurityGroup(sg network.SecurityGroup, clusterName string, service *v1.Service, lbIP *string, wantLb bool) (network.SecurityGroup, bool, error) {
serviceName := getServiceName(service)
var ports []v1.ServicePort
if wantLb {
Expand All @@ -793,6 +801,17 @@ func (az *Cloud) reconcileSecurityGroup(sg network.SecurityGroup, clusterName st
ports = []v1.ServicePort{}
}

destinationIPAddress := ""
if wantLb {
if lbIP == nil {
return sg, false, fmt.Errorf("No load balancer IP for setting up security rules for service %s", service.Name)
}
destinationIPAddress = *lbIP
}
if destinationIPAddress == "" {
destinationIPAddress = "*"
}

sourceRanges, err := serviceapi.GetLoadBalancerSourceRanges(service)
if err != nil {
return sg, false, err
Expand Down Expand Up @@ -824,7 +843,7 @@ func (az *Cloud) reconcileSecurityGroup(sg network.SecurityGroup, clusterName st
SourcePortRange: to.StringPtr("*"),
DestinationPortRange: to.StringPtr(strconv.Itoa(int(port.Port))),
SourceAddressPrefix: to.StringPtr(sourceAddressPrefixes[j]),
DestinationAddressPrefix: to.StringPtr("*"),
DestinationAddressPrefix: to.StringPtr(destinationIPAddress),
Access: network.SecurityRuleAccessAllow,
Direction: network.SecurityRuleDirectionInbound,
},
Expand Down
10 changes: 5 additions & 5 deletions pkg/cloudprovider/providers/azure/azure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ func TestReconcileSecurityGroupNewServiceAddsPort(t *testing.T) {

sg := getTestSecurityGroup()

sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svc1, true)
sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svc1, to.StringPtr("192.168.0.0"), true)
if err != nil {
t.Errorf("Unexpected error: %q", err)
}
Expand All @@ -313,7 +313,7 @@ func TestReconcileSecurityGroupNewInternalServiceAddsPort(t *testing.T) {

sg := getTestSecurityGroup()

sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svc1, true)
sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svc1, to.StringPtr("192.168.0.0"), true)
if err != nil {
t.Errorf("Unexpected error: %q", err)
}
Expand All @@ -329,7 +329,7 @@ func TestReconcileSecurityGroupRemoveService(t *testing.T) {

validateSecurityGroup(t, sg, service1, service2)
az := getTestCloud()
sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &service1, false)
sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &service1, nil, false)
if err != nil {
t.Errorf("Unexpected error: %q", err)
}
Expand All @@ -344,7 +344,7 @@ func TestReconcileSecurityGroupRemoveServiceRemovesPort(t *testing.T) {
sg := getTestSecurityGroup(svc)

svcUpdated := getTestService("servicea", v1.ProtocolTCP, 80)
sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svcUpdated, true)
sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svcUpdated, to.StringPtr("192.168.0.0"), true)
if err != nil {
t.Errorf("Unexpected error: %q", err)
}
Expand All @@ -361,7 +361,7 @@ func TestReconcileSecurityWithSourceRanges(t *testing.T) {
}

sg := getTestSecurityGroup(svc)
sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svc, true)
sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svc, to.StringPtr("192.168.0.0"), true)
if err != nil {
t.Errorf("Unexpected error: %q", err)
}
Expand Down