Skip to content

Commit

Permalink
Introducing agentpool client
Browse files Browse the repository at this point in the history
  • Loading branch information
wenxuan0923 committed Apr 3, 2024
1 parent 46f04ae commit 6ee1b55
Show file tree
Hide file tree
Showing 284 changed files with 45,076 additions and 231 deletions.
157 changes: 157 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ import (
"os"
"time"

_ "go.uber.org/mock/mockgen/model"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm/policy"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud"
azurecore_policy "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
"github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2017-05-10/resources"
"github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2021-02-01/storage"
Expand Down Expand Up @@ -138,6 +147,145 @@ func (az *azDeploymentsClient) Delete(ctx context.Context, resourceGroupName, de
return future.Response(), err
}

//go:generate sh -c "mockgen k8s.io/autoscaler/cluster-autoscaler/cloudprovider/azure AgentPoolsClient >./mock_agentpool_client/agentpool_client.go"

// AgentPoolsClient interface defines the methods needed for scaling vms pool.
// it is implemented by track2 sdk armcontainerservice.AgentPoolsClient
type AgentPoolsClient interface {
Get(ctx context.Context,
resourceGroupName, resourceName, agentPoolName string,
options *armcontainerservice.AgentPoolsClientGetOptions) (
armcontainerservice.AgentPoolsClientGetResponse, error)
BeginCreateOrUpdate(
ctx context.Context,
resourceGroupName, resourceName, agentPoolName string,
parameters armcontainerservice.AgentPool,
options *armcontainerservice.AgentPoolsClientBeginCreateOrUpdateOptions) (
*runtime.Poller[armcontainerservice.AgentPoolsClientCreateOrUpdateResponse], error)
BeginDeleteMachines(
ctx context.Context,
resourceGroupName, resourceName, agentPoolName string,
machines armcontainerservice.AgentPoolDeleteMachinesParameter,
options *armcontainerservice.AgentPoolsClientBeginDeleteMachinesOptions) (
*runtime.Poller[armcontainerservice.AgentPoolsClientDeleteMachinesResponse], error)
}

const userAgent = "AKS-autoscaler"

func getAgentpoolClientCredentials(cfg *Config) (azcore.TokenCredential, error) {
var cred azcore.TokenCredential
var err error
if cfg.AuthMethod == authMethodCLI {
cred, err = azidentity.NewAzureCLICredential(&azidentity.AzureCLICredentialOptions{
TenantID: cfg.TenantID})
if err != nil {
klog.Errorf("NewAzureCLICredential failed: %v", err)
return nil, err
}
} else if cfg.AuthMethod == "" || cfg.AuthMethod == authMethodPrincipal {
cred, err = azidentity.NewClientSecretCredential(cfg.TenantID, cfg.AADClientID, cfg.AADClientSecret, nil)
if err != nil {
klog.Errorf("NewClientSecretCredential failed: %v", err)
return nil, err
}
} else {
return nil, fmt.Errorf("unsupported authorization method: %s", cfg.AuthMethod)
}
return cred, nil
}

func getAgentpoolClientRetryOptions(cfg *Config) azurecore_policy.RetryOptions {
if cfg.AuthMethod == authMethodCLI {
return azurecore_policy.RetryOptions{
MaxRetries: -1, // no retry when using CLI auth for UT
}
}
return azurecore_policy.RetryOptions{
MaxRetries: 5,
RetryDelay: 1 * time.Second,
MaxRetryDelay: 2 * time.Second,
TryTimeout: 10 * time.Second,
}
}

func newAgentpoolClientWithEndpoint(baseURL, subscriptionID string, retryOptions azurecore_policy.RetryOptions) (AgentPoolsClient, error) {
// for AKS managed CAS, we will use the ARMBaseURL to create a fake agent pool client
// so that the request will be sent to node provioner endpoint intead of the public ARM endpoint
agentPoolsClient, err := armcontainerservice.NewAgentPoolsClient(subscriptionID, nil,
&policy.ClientOptions{
ClientOptions: azurecore_policy.ClientOptions{
Cloud: cloud.Configuration{
Services: map[cloud.ServiceName]cloud.ServiceConfiguration{
cloud.ResourceManager: {
Endpoint: baseURL,
Audience: "UNKNOWN",
},
},
},
Telemetry: azurecore_policy.TelemetryOptions{
ApplicationID: userAgent,
},
Retry: retryOptions,
},
})

if err != nil {
return nil, fmt.Errorf("failed to init cluster agent pools client: %w", err)
}

klog.V(10).Infof("Successfully created agent pool client with ARMBaseURL")
return agentPoolsClient, nil
}

func newAgentpoolClient(cfg *Config) (AgentPoolsClient, error) {
retryOptions := getAgentpoolClientRetryOptions(cfg)

if cfg.ARMBaseURL != "" {
klog.V(10).Infof("Using ARMBaseURL to create agent pool client")
return newAgentpoolClientWithEndpoint(cfg.ARMBaseURL, cfg.SubscriptionID, retryOptions)
}

cred, err := getAgentpoolClientCredentials(cfg)
if err != nil {
klog.Errorf("failed to get agent pool client credentials: %v", err)
return nil, err
}

// default to public cloud
env := azure.PublicCloud
if cfg.Cloud != "" {
env, err = azure.EnvironmentFromName(cfg.Cloud)
if err != nil {
klog.Errorf("failed to get environment from name %s: with error: %v", cfg.Cloud, err)
return nil, err
}
}
agentPoolsClient, err := armcontainerservice.NewAgentPoolsClient(cfg.SubscriptionID, cred,
&policy.ClientOptions{
ClientOptions: azurecore_policy.ClientOptions{
Cloud: cloud.Configuration{
Services: map[cloud.ServiceName]cloud.ServiceConfiguration{
cloud.ResourceManager: {
Endpoint: env.ResourceManagerEndpoint,
Audience: env.TokenAudience,
},
},
},
Telemetry: azurecore_policy.TelemetryOptions{
ApplicationID: userAgent,
},
Retry: retryOptions,
},
})

if err != nil {
klog.Errorf("failed to init cluster agent pools client: %v", err)
return nil, fmt.Errorf("failed to init cluster agent pools client: %w", err)
}

return agentPoolsClient, nil
}

type azAccountsClient struct {
client storage.AccountsClient
}
Expand All @@ -151,6 +299,7 @@ type azClient struct {
disksClient diskclient.Interface
storageAccountsClient storageaccountclient.Interface
skuClient compute.ResourceSkusClient
agentPoolClient AgentPoolsClient
}

// newServicePrincipalTokenFromCredentials creates a new ServicePrincipalToken using values of the
Expand Down Expand Up @@ -278,6 +427,13 @@ func newAzClient(cfg *Config, env *azure.Environment) (*azClient, error) {
skuClient.Authorizer = azClientConfig.Authorizer
klog.V(5).Infof("Created sku client with authorizer: %v", skuClient)

agentPoolClient, err := newAgentpoolClient(cfg)
if err != nil {
// we don't want to fail the whole process so we don't break any existing functionality
// since this may not be fatal - it is only used by vms pool which is still under development.
klog.Warningf("newAgentpoolClient failed with error: %s", err)
}

return &azClient{
disksClient: disksClient,
interfacesClient: interfacesClient,
Expand All @@ -287,5 +443,6 @@ func newAzClient(cfg *Config, env *azure.Environment) (*azClient, error) {
virtualMachinesClient: virtualMachinesClient,
storageAccountsClient: storageAccountsClient,
skuClient: skuClient,
agentPoolClient: agentPoolClient,
}, nil
}
20 changes: 18 additions & 2 deletions cluster-autoscaler/cloudprovider/azure/azure_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,16 @@ type Config struct {
Location string `json:"location" yaml:"location"`
TenantID string `json:"tenantId" yaml:"tenantId"`
SubscriptionID string `json:"subscriptionId" yaml:"subscriptionId"`
ResourceGroup string `json:"resourceGroup" yaml:"resourceGroup"`
VMType string `json:"vmType" yaml:"vmType"`
ClusterName string `json:"clusterName" yaml:"clusterName"`
// ResourceGroup is the MC_ resource group where the nodes are located.
ResourceGroup string `json:"resourceGroup" yaml:"resourceGroup"`
// ClusterResourceGroup is the resource group where the cluster is located.
ClusterResourceGroup string `json:"clusterResourceGroup" yaml:"clusterResourceGroup"`
VMType string `json:"vmType" yaml:"vmType"`

// ARMBaseURL is the URL to use for operations for the VMs pool.
// It can override the default public ARM endpoint for VMs pool scale operations.
ARMBaseURL string `json:"armBaseURL" yaml:"armBaseURL"`

// AuthMethod determines how to authorize requests for the Azure
// cloud. Valid options are "principal" (= the traditional
Expand Down Expand Up @@ -294,6 +302,12 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
}
}
}

// always read the following from environment variables since azure.json doesn't have these fields
cfg.ClusterName = os.Getenv("CLUSTER_NAME")
cfg.ClusterResourceGroup = os.Getenv("ARM_CLUSTER_RESOURCE_GROUP")
cfg.ARMBaseURL = os.Getenv("ARM_BASE_URL")

cfg.TrimSpace()

if cloudProviderRateLimit := os.Getenv("CLOUD_PROVIDER_RATE_LIMIT"); cloudProviderRateLimit != "" {
Expand Down Expand Up @@ -460,7 +474,9 @@ func (cfg *Config) TrimSpace() {
cfg.Location = strings.TrimSpace(cfg.Location)
cfg.TenantID = strings.TrimSpace(cfg.TenantID)
cfg.SubscriptionID = strings.TrimSpace(cfg.SubscriptionID)
cfg.ClusterName = strings.TrimSpace(cfg.ClusterName)
cfg.ResourceGroup = strings.TrimSpace(cfg.ResourceGroup)
cfg.ClusterResourceGroup = strings.TrimSpace(cfg.ClusterResourceGroup)
cfg.VMType = strings.TrimSpace(cfg.VMType)
cfg.AADClientID = strings.TrimSpace(cfg.AADClientID)
cfg.AADClientSecret = strings.TrimSpace(cfg.AADClientSecret)
Expand Down
6 changes: 6 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,9 @@ func TestCreateAzureManagerWithNilConfig(t *testing.T) {
TenantID: "tenantId",
SubscriptionID: "subscriptionId",
ResourceGroup: "resourceGroup",
ClusterName: "mycluster",
ClusterResourceGroup: "myrg",
ARMBaseURL: "nodeprovisioner-svc.nodeprovisioner.svc.cluster.local",
VMType: "vmss",
AADClientID: "aadClientId",
AADClientSecret: "aadClientSecret",
Expand Down Expand Up @@ -446,6 +449,9 @@ func TestCreateAzureManagerWithNilConfig(t *testing.T) {
t.Setenv("BACKOFF_DURATION", "1")
t.Setenv("BACKOFF_JITTER", "1")
t.Setenv("CLOUD_PROVIDER_RATE_LIMIT", "true")
t.Setenv("CLUSTER_NAME", "mycluster")
t.Setenv("ARM_CLUSTER_RESOURCE_GROUP", "myrg")
t.Setenv("ARM_BASE_URL", "nodeprovisioner-svc.nodeprovisioner.svc.cluster.local")

t.Run("environment variables correctly set", func(t *testing.T) {
manager, err := createAzureManagerInternal(nil, cloudprovider.NodeGroupDiscoveryOptions{}, mockAzClient)
Expand Down
19 changes: 14 additions & 5 deletions cluster-autoscaler/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ go 1.21
require (
cloud.google.com/go/compute/metadata v0.2.3
github.com/Azure/azure-sdk-for-go v68.0.0+incompatible
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.2
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.4.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.8.0-beta.1
github.com/Azure/go-autorest/autorest v0.11.29
github.com/Azure/go-autorest/autorest/adal v0.9.23
github.com/Azure/go-autorest/autorest/azure/auth v0.5.8
Expand All @@ -18,7 +21,7 @@ require (
github.com/golang/mock v1.6.0
github.com/google/go-cmp v0.6.0
github.com/google/go-querystring v1.0.0
github.com/google/uuid v1.3.0
github.com/google/uuid v1.3.1
github.com/jmespath/go-jmespath v0.4.0
github.com/json-iterator/go v1.1.12
github.com/onsi/ginkgo/v2 v2.13.0
Expand All @@ -28,10 +31,11 @@ require (
github.com/satori/go.uuid v1.2.0
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.4
golang.org/x/crypto v0.16.0
golang.org/x/net v0.19.0
go.uber.org/mock v0.4.0
golang.org/x/crypto v0.19.0
golang.org/x/net v0.21.0
golang.org/x/oauth2 v0.10.0
golang.org/x/sys v0.15.0
golang.org/x/sys v0.17.0
google.golang.org/api v0.126.0
google.golang.org/grpc v1.58.3
google.golang.org/protobuf v1.33.0
Expand All @@ -58,12 +62,14 @@ require (

require (
cloud.google.com/go/compute v1.23.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.2 // indirect
github.com/Azure/go-autorest v14.2.0+incompatible // indirect
github.com/Azure/go-autorest/autorest/azure/cli v0.4.2 // indirect
github.com/Azure/go-autorest/autorest/mocks v0.4.2 // indirect
github.com/Azure/go-autorest/autorest/validation v0.3.1 // indirect
github.com/Azure/go-autorest/logger v0.2.1 // indirect
github.com/Azure/go-autorest/tracing v0.6.0 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1 // indirect
github.com/GoogleCloudPlatform/k8s-cloud-provider v1.18.1-0.20220218231025-f11817397a1b // indirect
github.com/JeffAshton/win_pdh v0.0.0-20161109143554-76bb4ee9f0ab // indirect
github.com/Microsoft/go-winio v0.6.0 // indirect
Expand Down Expand Up @@ -103,6 +109,7 @@ require (
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
github.com/golang-jwt/jwt/v5 v5.0.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/cadvisor v0.48.1 // indirect
Expand All @@ -120,6 +127,7 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/karrick/godirwalk v1.17.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/libopenstorage/openstorage v1.0.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
Expand All @@ -137,6 +145,7 @@ require (
github.com/opencontainers/runc v1.1.10 // indirect
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.44.0 // indirect
Expand Down Expand Up @@ -171,7 +180,7 @@ require (
golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/sync v0.5.0 // indirect
golang.org/x/term v0.15.0 // indirect
golang.org/x/term v0.17.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/tools v0.16.1 // indirect
Expand Down
Loading

0 comments on commit 6ee1b55

Please sign in to comment.