Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions cli/internal/install/cloudinstall/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v7"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v7"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources/v3"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions"
"github.com/fatih/color"
Expand Down Expand Up @@ -105,17 +104,6 @@ func (inst *Installer) InstallCloud(ctx context.Context, skipShared bool) (err e
return install.ErrAlreadyLoggedError
}

if inst.Config.Cloud.PrivateNetworking {
for _, cluster := range inst.Config.Cloud.Compute.Clusters {
if cluster.ExistingSubnet != nil {
if err := inst.ensureResourceGroupCreated(ctx, cluster.ExistingSubnet.PrivateLinkResourceGroup); err != nil {
logError(ctx, err, "")
return install.ErrAlreadyLoggedError
}
}
}
}

if err := inst.preflightCheck(ctx); err != nil {
if !errors.Is(err, install.ErrAlreadyLoggedError) {
logError(ctx, err, "")
Expand Down Expand Up @@ -220,12 +208,6 @@ func (inst *Installer) UninstallCloud(ctx context.Context, all bool) error {
}
}

if inst.Config.Cloud.PrivateNetworking {
inst.forEachVnet(ctx, func(ctx context.Context, vnet *armnetwork.VirtualNetwork, subnet *armnetwork.Subnet, configSubnet *SubnetReference) error {
return inst.safeDeleteResourceGroup(ctx, configSubnet.PrivateLinkResourceGroup)
})
}

if inst.Config.Cloud.TlsCertificate != nil && inst.Config.Cloud.TlsCertificate.KeyVault != nil {
kvIdentityResp, err := inst.getTraefikKeyVaultClientManagedIdentity(ctx)
if err != nil {
Expand Down
13 changes: 6 additions & 7 deletions cli/internal/install/cloudinstall/cloudconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,11 @@ type ClusterConfig struct {
}

type SubnetReference struct {
ResourceGroup string `yaml:"resourceGroup"`
VNetName string `yaml:"vnetName"`
SubnetName string `yaml:"subnetName"`
PrivateLinkResourceGroup string `yaml:"-"`
VNetResourceId string `yaml:"-"`
SubnetResourceId string `yaml:"-"`
ResourceGroup string `yaml:"resourceGroup"`
VNetName string `yaml:"vnetName"`
SubnetName string `yaml:"subnetName"`
VNetResourceId string `yaml:"-"`
SubnetResourceId string `yaml:"-"`
}

type NodePoolConfig struct {
Expand Down Expand Up @@ -380,7 +379,7 @@ func RenderConfig(templateValues ConfigTemplateValues, writer io.Writer) error {
},
{
Name: "gpunp",
VMSize: "Standard_NC6s_v3",
VMSize: "Standard_NC24ads_A100_v4",
MinCount: templateValues.GpuNodePoolMinCount,
MaxCount: 10,
}},
Expand Down
4 changes: 2 additions & 2 deletions cli/internal/install/cloudinstall/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ func (inst *Installer) assignDnsRecord(ctx context.Context, org *OrganizationCon
return fmt.Errorf("failed to create network interfaces client: %w", err)
}

nic, err := interfacesClient.Get(ctx, configSubnet.PrivateLinkResourceGroup, "traefik-pe-nic", nil)
nic, err := interfacesClient.Get(ctx, inst.Config.Cloud.ResourceGroup, "traefik-pe-nic", nil)
if err != nil {
return fmt.Errorf("failed to get network interface: %w", err)
}

ipAddress = *nic.Properties.IPConfigurations[0].Properties.PrivateIPAddress
}

return inst.createPrivateDnsZoneWithRecord(ctx, org.Api.DomainName, ipAddress, configSubnet)
return inst.createPrivateDnsZoneWithRecord(ctx, org.Cloud.ResourceGroup, org.Api.DomainName, ipAddress, configSubnet)
})
}

Expand Down
48 changes: 24 additions & 24 deletions cli/internal/install/cloudinstall/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ import (
"github.com/rs/zerolog/log"
)

func (inst *Installer) createPrivateEndpointsForStorageAccount(ctx context.Context, targetResource *armstorage.Account) error {
func (inst *Installer) createPrivateEndpointsForStorageAccount(ctx context.Context, resourceGroup string, targetResource *armstorage.Account) error {
return inst.forEachVnet(ctx, func(ctx context.Context, vnet *armnetwork.VirtualNetwork, subnet *armnetwork.Subnet, configSubnet *SubnetReference) error {
return inst.createPrivateEndpoints(ctx, fmt.Sprintf("storage-%s-pe", *targetResource.Name), *targetResource.ID, []*string{Ptr("blob")}, fmt.Sprintf("%s.privatelink.blob.core.windows.net", *targetResource.Name), vnet, subnet, configSubnet)
return inst.createPrivateEndpoints(ctx, resourceGroup, fmt.Sprintf("storage-%s-pe", *targetResource.Name), *targetResource.ID, []*string{Ptr("blob")}, fmt.Sprintf("%s.privatelink.blob.core.windows.net", *targetResource.Name), vnet, subnet, configSubnet)
})
}

func (inst *Installer) createPrivateEndpointsForPostgresFlexibleServer(ctx context.Context, targetResource *armpostgresqlflexibleservers.Server) error {
return inst.forEachVnet(ctx, func(ctx context.Context, vnet *armnetwork.VirtualNetwork, subnet *armnetwork.Subnet, configSubnet *SubnetReference) error {
return inst.createPrivateEndpoints(ctx, fmt.Sprintf("postgres-%s-pe", *targetResource.Name), *targetResource.ID, []*string{Ptr("postgresqlServer")}, fmt.Sprintf("%s.privatelink.postgres.database.azure.com", *targetResource.Name), vnet, subnet, configSubnet)
return inst.createPrivateEndpoints(ctx, inst.Config.Cloud.ResourceGroup, fmt.Sprintf("postgres-%s-pe", *targetResource.Name), *targetResource.ID, []*string{Ptr("postgresqlServer")}, fmt.Sprintf("%s.privatelink.postgres.database.azure.com", *targetResource.Name), vnet, subnet, configSubnet)
})
}

Expand All @@ -53,11 +53,11 @@ func (inst *Installer) createPrivateEndpointsForTraefik(ctx context.Context, clu
return fmt.Errorf("failed to get private link service for Traefik: %w", err)
}

return inst.createPrivateEndpoints(ctx, "traefik-pe", *plService.ID, []*string{}, "", vnet, subnet, configSubnet)
return inst.createPrivateEndpoints(ctx, inst.Config.Cloud.ResourceGroup, "traefik-pe", *plService.ID, []*string{}, "", vnet, subnet, configSubnet)
})
}

func (inst *Installer) createPrivateEndpoints(ctx context.Context, privateEndpointName string, targetResourceId string, groupIds []*string, privateDnsZoneName string, vnet *armnetwork.VirtualNetwork, subnet *armnetwork.Subnet, configSubnet *SubnetReference) error {
func (inst *Installer) createPrivateEndpoints(ctx context.Context, resourceGroup string, privateEndpointName string, targetResourceId string, groupIds []*string, privateDnsZoneName string, vnet *armnetwork.VirtualNetwork, subnet *armnetwork.Subnet, configSubnet *SubnetReference) error {
nicName := fmt.Sprintf("%s-nic", privateEndpointName)

privateEndpoint := armnetwork.PrivateEndpoint{
Expand Down Expand Up @@ -93,7 +93,7 @@ func (inst *Installer) createPrivateEndpoints(ctx context.Context, privateEndpoi
return fmt.Errorf("failed to create private endpoint client: %w", err)
}

poller, err := privateEndpointClient.BeginCreateOrUpdate(ctx, configSubnet.PrivateLinkResourceGroup, privateEndpointName, privateEndpoint, nil)
poller, err := privateEndpointClient.BeginCreateOrUpdate(ctx, resourceGroup, privateEndpointName, privateEndpoint, nil)
if err != nil {
return fmt.Errorf("failed to create private endpoint: %w", err)
}
Expand All @@ -109,12 +109,12 @@ func (inst *Installer) createPrivateEndpoints(ctx context.Context, privateEndpoi
return fmt.Errorf("failed to create network interfaces client: %w", err)
}

nic, err := interfacesClient.Get(ctx, configSubnet.PrivateLinkResourceGroup, nicName, nil)
nic, err := interfacesClient.Get(ctx, resourceGroup, nicName, nil)
if err != nil {
return fmt.Errorf("failed to get network interface '%s': %w", nicName, err)
}

if err := inst.createPrivateDnsZoneWithRecord(ctx, privateDnsZoneName, *nic.Properties.IPConfigurations[0].Properties.PrivateIPAddress, configSubnet); err != nil {
if err := inst.createPrivateDnsZoneWithRecord(ctx, resourceGroup, privateDnsZoneName, *nic.Properties.IPConfigurations[0].Properties.PrivateIPAddress, configSubnet); err != nil {
return fmt.Errorf("failed to create private DNS zone: %w", err)
}
}
Expand All @@ -129,11 +129,11 @@ func (inst *Installer) createPrivateEndpoints(ctx context.Context, privateEndpoi
func (inst *Installer) createAksPrivateDnsZone(ctx context.Context, location string, clusterName string, subnet *SubnetReference) (string, error) {
h := sha256.Sum256([]byte(inst.Config.Cloud.SubscriptionID + "/" + inst.Config.Cloud.ResourceGroup + "/" + clusterName))
zoneName := fmt.Sprintf("%s%s.privatelink.%s.azmk8s.io", inst.Config.EnvironmentName, hex.EncodeToString(h[:4]), location)
return inst.createPrivateDnsZone(ctx, zoneName, subnet)
return inst.createPrivateDnsZone(ctx, inst.Config.Cloud.ResourceGroup, zoneName, subnet)
}

func (inst *Installer) createPrivateDnsZoneWithRecord(ctx context.Context, domainName string, ipAddress string, subnet *SubnetReference) error {
_, err := inst.createPrivateDnsZone(ctx, domainName, subnet)
func (inst *Installer) createPrivateDnsZoneWithRecord(ctx context.Context, resourceGroup string, domainName string, ipAddress string, subnet *SubnetReference) error {
_, err := inst.createPrivateDnsZone(ctx, resourceGroup, domainName, subnet)
if err != nil {
return err
}
Expand All @@ -143,7 +143,7 @@ func (inst *Installer) createPrivateDnsZoneWithRecord(ctx context.Context, domai
return fmt.Errorf("failed to create record sets client: %w", err)
}

_, err = recordSetsClient.CreateOrUpdate(ctx, subnet.PrivateLinkResourceGroup, domainName, armprivatedns.RecordTypeA, "@",
_, err = recordSetsClient.CreateOrUpdate(ctx, resourceGroup, domainName, armprivatedns.RecordTypeA, "@",
armprivatedns.RecordSet{Properties: &armprivatedns.RecordSetProperties{
ARecords: []*armprivatedns.ARecord{
{
Expand All @@ -162,7 +162,7 @@ func (inst *Installer) createPrivateDnsZoneWithRecord(ctx context.Context, domai

// createPrivateDnsZone creates a private DNS zone, links it to the spoke VNet
// and any additional VNets, removes stale links, and returns the zone's resource ID.
func (inst *Installer) createPrivateDnsZone(ctx context.Context, zoneName string, subnet *SubnetReference) (string, error) {
func (inst *Installer) createPrivateDnsZone(ctx context.Context, resourceGroup string, zoneName string, subnet *SubnetReference) (string, error) {
privateDNSZoneClient, err := armprivatedns.NewPrivateZonesClient(inst.Config.Cloud.SubscriptionID, inst.Credential, nil)
if err != nil {
return "", fmt.Errorf("failed to create private DNS zone client: %w", err)
Expand All @@ -177,7 +177,7 @@ func (inst *Installer) createPrivateDnsZone(ctx context.Context, zoneName string

log.Ctx(ctx).Info().Msgf("Creating or updating private DNS zone '%s'", zoneName)

dnsZonePoller, err := privateDNSZoneClient.BeginCreateOrUpdate(ctx, subnet.PrivateLinkResourceGroup, zoneName, armprivatedns.PrivateZone{
dnsZonePoller, err := privateDNSZoneClient.BeginCreateOrUpdate(ctx, resourceGroup, zoneName, armprivatedns.PrivateZone{
Location: Ptr("global"),
Tags: tags,
}, nil)
Expand Down Expand Up @@ -205,7 +205,7 @@ func (inst *Installer) createPrivateDnsZone(ctx context.Context, zoneName string
return "", fmt.Errorf("failed to create virtual network links client: %w", err)
}

linkPoller, err := virtualNetworkLinksClient.BeginCreateOrUpdate(ctx, subnet.PrivateLinkResourceGroup, zoneName, fmt.Sprintf("%s-%s", subnet.ResourceGroup, subnet.VNetName),
linkPoller, err := virtualNetworkLinksClient.BeginCreateOrUpdate(ctx, resourceGroup, zoneName, fmt.Sprintf("%s-%s", subnet.ResourceGroup, subnet.VNetName),
armprivatedns.VirtualNetworkLink{
Location: Ptr("global"),
Properties: &armprivatedns.VirtualNetworkLinkProperties{
Expand Down Expand Up @@ -256,7 +256,7 @@ func (inst *Installer) createPrivateDnsZone(ctx context.Context, zoneName string
log.Ctx(ctx).Info().Msgf("Creating or updating virtual network link '%s' for DNS zone '%s'", linkName, zoneName)

additionalLinkPoller, err := virtualNetworkLinksClient.BeginCreateOrUpdate(ctx,
subnet.PrivateLinkResourceGroup, zoneName, linkName,
resourceGroup, zoneName, linkName,
armprivatedns.VirtualNetworkLink{
Location: Ptr("global"),
Properties: &armprivatedns.VirtualNetworkLinkProperties{
Expand All @@ -277,7 +277,7 @@ func (inst *Installer) createPrivateDnsZone(ctx context.Context, zoneName string
}

// Remove stale VNet links whose destination VNet ID is not in the expected set
linkPager := virtualNetworkLinksClient.NewListPager(subnet.PrivateLinkResourceGroup, zoneName, nil)
linkPager := virtualNetworkLinksClient.NewListPager(resourceGroup, zoneName, nil)
for linkPager.More() {
page, err := linkPager.NextPage(ctx)
if err != nil {
Expand All @@ -287,7 +287,7 @@ func (inst *Installer) createPrivateDnsZone(ctx context.Context, zoneName string
if link.Properties != nil && link.Properties.VirtualNetwork != nil && link.Properties.VirtualNetwork.ID != nil {
if !expectedVnetIDs[strings.ToLower(*link.Properties.VirtualNetwork.ID)] {
log.Ctx(ctx).Info().Msgf("Removing stale virtual network link '%s' (VNet '%s') from DNS zone '%s'", *link.Name, *link.Properties.VirtualNetwork.ID, zoneName)
deletePoller, err := virtualNetworkLinksClient.BeginDelete(ctx, subnet.PrivateLinkResourceGroup, zoneName, *link.Name, nil)
deletePoller, err := virtualNetworkLinksClient.BeginDelete(ctx, resourceGroup, zoneName, *link.Name, nil)
if err != nil {
return "", fmt.Errorf("failed to delete stale virtual network link '%s': %w", *link.Name, err)
}
Expand Down Expand Up @@ -377,7 +377,7 @@ func (inst *Installer) deleteOrgPrivateLinkResources(ctx context.Context, org *O
// Delete per-org private DNS zones: first remove VNet links, then delete the zone
for _, zoneName := range dnsZoneNames {
// Delete all VNet links in this zone first
linkPager := virtualNetworkLinksClient.NewListPager(configSubnet.PrivateLinkResourceGroup, zoneName, nil)
linkPager := virtualNetworkLinksClient.NewListPager(org.Cloud.ResourceGroup, zoneName, nil)
for linkPager.More() {
page, err := linkPager.NextPage(ctx)
if err != nil {
Expand All @@ -388,7 +388,7 @@ func (inst *Installer) deleteOrgPrivateLinkResources(ctx context.Context, org *O
}
for _, link := range page.Value {
log.Ctx(ctx).Info().Msgf("Deleting virtual network link '%s' from DNS zone '%s'", *link.Name, zoneName)
linkPoller, err := virtualNetworkLinksClient.BeginDelete(ctx, configSubnet.PrivateLinkResourceGroup, zoneName, *link.Name, nil)
linkPoller, err := virtualNetworkLinksClient.BeginDelete(ctx, org.Cloud.ResourceGroup, zoneName, *link.Name, nil)
if err != nil {
if isNotFoundError(err) {
continue
Expand All @@ -401,10 +401,10 @@ func (inst *Installer) deleteOrgPrivateLinkResources(ctx context.Context, org *O
}
}

log.Ctx(ctx).Info().Msgf("Deleting private DNS zone '%s' from '%s'", zoneName, configSubnet.PrivateLinkResourceGroup)
log.Ctx(ctx).Info().Msgf("Deleting private DNS zone '%s' from '%s'", zoneName, org.Cloud.ResourceGroup)
// Retry zone deletion on 409 Conflict — Azure may not have fully propagated VNet link deletions yet
for attempt := 0; ; attempt++ {
poller, err := privateDnsZoneClient.BeginDelete(ctx, configSubnet.PrivateLinkResourceGroup, zoneName, nil)
poller, err := privateDnsZoneClient.BeginDelete(ctx, org.Cloud.ResourceGroup, zoneName, nil)
if err != nil {
if isNotFoundError(err) {
break
Expand Down Expand Up @@ -432,8 +432,8 @@ func (inst *Installer) deleteOrgPrivateLinkResources(ctx context.Context, org *O

for _, name := range storageAccountNames {
peName := fmt.Sprintf("storage-%s-pe", name)
log.Ctx(ctx).Info().Msgf("Deleting private endpoint '%s' from '%s'", peName, configSubnet.PrivateLinkResourceGroup)
pePoller, err := privateEndpointClient.BeginDelete(ctx, configSubnet.PrivateLinkResourceGroup, peName, nil)
log.Ctx(ctx).Info().Msgf("Deleting private endpoint '%s' from '%s'", peName, org.Cloud.ResourceGroup)
pePoller, err := privateEndpointClient.BeginDelete(ctx, org.Cloud.ResourceGroup, peName, nil)
if err != nil {
if isNotFoundError(err) {
continue
Expand Down
63 changes: 63 additions & 0 deletions cli/internal/install/cloudinstall/preflight.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v7"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources/v3"
"github.com/IGLOU-EU/go-wildcard/v2"
"github.com/golang-jwt/jwt/v5"
Expand All @@ -27,6 +28,10 @@ func (inst *Installer) preflightCheck(ctx context.Context) error {
return err
}

if err := inst.checkNoLegacyPrivateLinkResourceGroups(ctx); err != nil {
return err
}

return nil
}

Expand Down Expand Up @@ -245,3 +250,61 @@ func checkAccess(ctx context.Context, scope, permission string, roleAssignments
func permissionMatches(required, actual string) bool {
return wildcard.Match(actual, required)
}

// checkNoLegacyPrivateLinkResourceGroups checks whether any legacy dedicated
// private-link resource groups (pattern "<rg>-privatelink-<subnetRG>-<vnet>")
// still exist. If they do, the deployment is aborted because private-link
// resources now live in the shared or per-organization resource groups.
func (inst *Installer) checkNoLegacyPrivateLinkResourceGroups(ctx context.Context) error {
if !inst.Config.Cloud.PrivateNetworking {
return nil
}

rgClient, err := armresources.NewResourceGroupsClient(inst.Config.Cloud.SubscriptionID, inst.Credential, nil)
if err != nil {
return fmt.Errorf("failed to create resource groups client: %w", err)
}

for _, cluster := range inst.Config.Cloud.Compute.Clusters {
if cluster.ExistingSubnet == nil {
continue
}

legacyRG := fmt.Sprintf("%s-privatelink-%s-%s",
inst.Config.Cloud.ResourceGroup,
cluster.ExistingSubnet.ResourceGroup,
cluster.ExistingSubnet.VNetName)

if _, err := rgClient.Get(ctx, legacyRG, nil); err == nil {
var msg strings.Builder
msg.WriteString(fmt.Sprintf(
"legacy private-link resource group '%s' still exists. "+
"Private-link resources are now placed in the shared and per-organization resource groups. "+
"AKS does not allow changing the private DNS zone resource group on an existing cluster, "+
"so the clusters must be deleted and re-created.\n\n"+
"To proceed, run the following commands and then re-deploy:\n\n",
legacyRG))

aksClient, err := armcontainerservice.NewManagedClustersClient(inst.Config.Cloud.SubscriptionID, inst.Credential, nil)
if err != nil {
return fmt.Errorf("failed to create AKS client: %w", err)
}

for _, c := range inst.Config.Cloud.Compute.Clusters {
if _, err := aksClient.Get(ctx, inst.Config.Cloud.ResourceGroup, c.Name, nil); err == nil {
msg.WriteString(fmt.Sprintf(" az aks delete -n %s -g %s --subscription %s --yes\n",
c.Name, inst.Config.Cloud.ResourceGroup, inst.Config.Cloud.SubscriptionID))
}
}

msg.WriteString(fmt.Sprintf("\n az group delete -n %s --subscription %s --yes\n",
legacyRG, inst.Config.Cloud.SubscriptionID))

msg.WriteString("\nNote: this will cause a service outage until the re-deployment completes.")

return fmt.Errorf("%s", msg.String())
}
}

return nil
}
2 changes: 1 addition & 1 deletion cli/internal/install/cloudinstall/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (inst *Installer) CreateStorageAccount(ctx context.Context,

// Create private endpoint if private networking is enabled
if inst.Config.Cloud.PrivateNetworking {
if err := inst.createPrivateEndpointsForStorageAccount(ctx, &res.Account); err != nil {
if err := inst.createPrivateEndpointsForStorageAccount(ctx, resourceGroupName, &res.Account); err != nil {
return nil, fmt.Errorf("failed to create private endpoints for storage account '%s': %w", storageAccountConfig.Name, err)
}
}
Expand Down
Loading
Loading