From 2836b7762703e67bd41aa79ac69725e0a8f1a68c Mon Sep 17 00:00:00 2001 From: bmurray Date: Wed, 8 Mar 2023 15:19:23 -0800 Subject: [PATCH] Adding GCPMachinePool Functionality --- cloud/const.go | 25 + cloud/interfaces.go | 10 + cloud/scope/machinepool.go | 440 ++++++++++++++++++ .../services/compute/instancegroups/client.go | 139 ++++++ cloud/services/compute/instancegroups/doc.go | 18 + .../compute/instancegroups/instancegroups.go | 240 ++++++++++ ...ture.cluster.x-k8s.io_gcpmachinepools.yaml | 265 +++++++++++ config/crd/kustomization.yaml | 3 + config/rbac/role.yaml | 45 ++ exp/api/v1beta1/conditions_consts.go | 24 + exp/api/v1beta1/gcpmachinepool_types.go | 233 ++++++++++ exp/api/v1beta1/zz_generated.deepcopy.go | 228 +++++++++ exp/controllers/gcpmachinepool_controller.go | 270 +++++++++++ .../gcpmanagedmachinepool_controller.go | 19 +- exp/controllers/helpers.go | 112 +++++ main.go | 34 +- 16 files changed, 2081 insertions(+), 24 deletions(-) create mode 100644 cloud/const.go create mode 100644 cloud/scope/machinepool.go create mode 100644 cloud/services/compute/instancegroups/client.go create mode 100644 cloud/services/compute/instancegroups/doc.go create mode 100644 cloud/services/compute/instancegroups/instancegroups.go create mode 100644 config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml create mode 100644 exp/api/v1beta1/gcpmachinepool_types.go create mode 100644 exp/controllers/gcpmachinepool_controller.go create mode 100644 exp/controllers/helpers.go diff --git a/cloud/const.go b/cloud/const.go new file mode 100644 index 000000000..8234ee9b7 --- /dev/null +++ b/cloud/const.go @@ -0,0 +1,25 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cloud + +const ( + // CustomDataHashAnnotation is the key for the machine object annotation + // which tracks the hash of the custom data. + // See https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ + // for annotation formatting rules. + CustomDataHashAnnotation = "sigs.k8s.io/cluster-api-provider-gcp-mig-custom-data-hash" +) diff --git a/cloud/interfaces.go b/cloud/interfaces.go index 12dfe6f08..7de3ffd3f 100644 --- a/cloud/interfaces.go +++ b/cloud/interfaces.go @@ -105,3 +105,13 @@ type Machine interface { MachineGetter MachineSetter } + +// MachinePoolGetter is an interface which can get machine pool information. +type MachinePoolGetter interface { + Project() string +} + +// MachinePool is an interface which can get machine pool information. +type MachinePool interface { + MachinePoolGetter +} diff --git a/cloud/scope/machinepool.go b/cloud/scope/machinepool.go new file mode 100644 index 000000000..69fca7bbf --- /dev/null +++ b/cloud/scope/machinepool.go @@ -0,0 +1,440 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scope + +import ( + "context" + "crypto/sha256" + "encoding/json" + "fmt" + "os" + "path" + "sort" + "strings" + + "github.com/pkg/errors" + "golang.org/x/mod/semver" + "google.golang.org/api/compute/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/cloud" + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + capierrors "sigs.k8s.io/cluster-api/errors" + clusterv1exp "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +type ( + // MachinePoolScopeParams defines the input parameters used to create a new MachinePoolScope. + MachinePoolScopeParams struct { + Client client.Client + ClusterGetter cloud.ClusterGetter + MachinePool *clusterv1exp.MachinePool + GCPMachinePool *infrav1exp.GCPMachinePool + } + // MachinePoolScope defines a scope defined around a machine pool and its cluster. + MachinePoolScope struct { + Client client.Client + PatchHelper *patch.Helper + CapiMachinePoolPatchHelper *patch.Helper + + ClusterGetter cloud.ClusterGetter + MachinePool *clusterv1exp.MachinePool + GCPMachinePool *infrav1exp.GCPMachinePool + } +) + +// NewMachinePoolScope creates a new MachinePoolScope from the supplied parameters. +func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, error) { + if params.Client == nil { + return nil, errors.New("client is required when creating a MachinePoolScope") + } + if params.MachinePool == nil { + return nil, errors.New("machine pool is required when creating a MachinePoolScope") + } + if params.GCPMachinePool == nil { + return nil, errors.New("gcp machine pool is required when creating a MachinePoolScope") + } + + helper, err := patch.NewHelper(params.GCPMachinePool, params.Client) + if err != nil { + return nil, errors.Wrapf(err, "failed to init patch helper for %s %s/%s", params.GCPMachinePool.GroupVersionKind(), params.GCPMachinePool.Namespace, params.GCPMachinePool.Name) + } + + capiMachinePoolPatchHelper, err := patch.NewHelper(params.MachinePool, params.Client) + if err != nil { + return nil, errors.Wrapf(err, "failed to init patch helper for %s %s/%s", params.MachinePool.GroupVersionKind(), params.MachinePool.Namespace, params.MachinePool.Name) + } + + return &MachinePoolScope{ + Client: params.Client, + ClusterGetter: params.ClusterGetter, + MachinePool: params.MachinePool, + GCPMachinePool: params.GCPMachinePool, + PatchHelper: helper, + CapiMachinePoolPatchHelper: capiMachinePoolPatchHelper, + }, nil +} + +// SetReady sets the GCPMachinePool Ready Status to true. +func (m *MachinePoolScope) SetReady() { + m.GCPMachinePool.Status.Ready = true +} + +// SetNotReady sets the GCPMachinePool Ready Status to false. +func (m *MachinePoolScope) SetNotReady() { + m.GCPMachinePool.Status.Ready = false +} + +// SetFailureMessage sets the GCPMachinePool status failure message. +func (m *MachinePoolScope) SetFailureMessage(v error) { + m.GCPMachinePool.Status.FailureMessage = ptr.To(v.Error()) +} + +// SetFailureReason sets the GCPMachinePool status failure reason. +func (m *MachinePoolScope) SetFailureReason(v capierrors.MachineStatusError) { + m.GCPMachinePool.Status.FailureReason = &v +} + +// PatchObject persists the GCPMachinePool spec and status on the API server. +func (m *MachinePoolScope) PatchObject(ctx context.Context) error { + return m.PatchHelper.Patch( + ctx, + m.GCPMachinePool, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.ReadyCondition, + infrav1exp.GCPMachinePoolReadyCondition, + infrav1exp.GCPMachinePoolCreatingCondition, + infrav1exp.GCPMachinePoolDeletingCondition, + }}, + ) +} + +// Close closes the current scope persisting the cluster configuration and status. +func (m *MachinePoolScope) Close(ctx context.Context) error { + if err := m.PatchObject(ctx); err != nil { + return err + } + if err := m.PatchCAPIMachinePoolObject(ctx); err != nil { + return errors.Wrap(err, "unable to patch CAPI MachinePool") + } + + return nil +} + +// InstanceGroupTemplateBuilder returns a GCP instance template. +func (m *MachinePoolScope) InstanceGroupTemplateBuilder(bootstrapData string) *compute.InstanceTemplate { + instanceTemplate := &compute.InstanceTemplate{ + Name: m.GCPMachinePool.Name, + Properties: &compute.InstanceProperties{ + MachineType: m.GCPMachinePool.Spec.InstanceType, + Tags: &compute.Tags{ + Items: append( + m.GCPMachinePool.Spec.AdditionalNetworkTags, + fmt.Sprintf("%s-%s", m.ClusterGetter.Name(), m.Role()), + m.ClusterGetter.Name(), + ), + }, + Labels: infrav1.Build(infrav1.BuildParams{ + ClusterName: m.ClusterGetter.Name(), + Lifecycle: infrav1.ResourceLifecycleOwned, + Role: ptr.To(m.Role()), + Additional: m.ClusterGetter.AdditionalLabels().AddLabels(m.GCPMachinePool.Spec.AdditionalLabels), + }), + Metadata: &compute.Metadata{ + Items: []*compute.MetadataItems{ + { + Key: "user-data", + Value: ptr.To(bootstrapData), + }, + }, + }, + }, + } + + instanceTemplate.Properties.Disks = append(instanceTemplate.Properties.Disks, m.InstanceImageSpec()) + instanceTemplate.Properties.Disks = append(instanceTemplate.Properties.Disks, m.InstanceAdditionalDiskSpec()...) + instanceTemplate.Properties.ServiceAccounts = append(instanceTemplate.Properties.ServiceAccounts, m.InstanceServiceAccountsSpec()) + instanceTemplate.Properties.NetworkInterfaces = append(instanceTemplate.Properties.NetworkInterfaces, m.InstanceNetworkInterfaceSpec()) + + return instanceTemplate +} + +// InstanceNetworkInterfaceSpec returns the network interface spec for the instance. +func (m *MachinePoolScope) InstanceNetworkInterfaceSpec() *compute.NetworkInterface { + networkInterface := &compute.NetworkInterface{ + Network: path.Join("projects", m.ClusterGetter.Project(), "global", "networks", m.ClusterGetter.NetworkName()), + } + + if m.GCPMachinePool.Spec.PublicIP != nil && *m.GCPMachinePool.Spec.PublicIP { + networkInterface.AccessConfigs = []*compute.AccessConfig{ + { + Type: "ONE_TO_ONE_NAT", + Name: "External NAT", + }, + } + } + + if m.GCPMachinePool.Spec.Subnet != nil { + networkInterface.Subnetwork = path.Join("regions", m.ClusterGetter.Region(), "subnetworks", *m.GCPMachinePool.Spec.Subnet) + } + + return networkInterface +} + +// InstanceAdditionalMetadataSpec returns the additional metadata for the instance. +func (m *MachinePoolScope) InstanceAdditionalMetadataSpec() *compute.MetadataItems { + metadataItems := new(compute.MetadataItems) + for _, additionalMetadata := range m.GCPMachinePool.Spec.AdditionalMetadata { + metadataItems = &compute.MetadataItems{ + Key: additionalMetadata.Key, + Value: additionalMetadata.Value, + } + } + return metadataItems +} + +// InstanceAdditionalDiskSpec returns the additional disks for the instance. +func (m *MachinePoolScope) InstanceAdditionalDiskSpec() []*compute.AttachedDisk { + additionalDisks := make([]*compute.AttachedDisk, 0, len(m.GCPMachinePool.Spec.AdditionalDisks)) + + for _, disk := range m.GCPMachinePool.Spec.AdditionalDisks { + additionalDisk := &compute.AttachedDisk{ + AutoDelete: true, + InitializeParams: &compute.AttachedDiskInitializeParams{ + DiskSizeGb: ptr.Deref(disk.Size, 30), + DiskType: string(*disk.DeviceType), + }, + } + if strings.HasSuffix(additionalDisk.InitializeParams.DiskType, string(infrav1.LocalSsdDiskType)) { + additionalDisk.Type = "SCRATCH" // Default is PERSISTENT. + // Override the Disk size + additionalDisk.InitializeParams.DiskSizeGb = 375 + // For local SSDs set interface to NVME (instead of default SCSI) which is faster. + // Most OS images would work with both NVME and SCSI disks but some may work + // considerably faster with NVME. + // https://cloud.google.com/compute/docs/disks/local-ssd#choose_an_interface + additionalDisk.Interface = "NVME" + } + additionalDisks = append(additionalDisks, additionalDisk) + } + return additionalDisks +} + +// InstanceImageSpec returns the image spec for the instance. +func (m *MachinePoolScope) InstanceImageSpec() *compute.AttachedDisk { + version := "" + if m.MachinePool.Spec.Template.Spec.Version != nil { + version = *m.MachinePool.Spec.Template.Spec.Version + } + image := "capi-ubuntu-1804-k8s-" + strings.ReplaceAll(semver.MajorMinor(version), ".", "-") + sourceImage := path.Join("projects", m.ClusterGetter.Project(), "global", "images", "family", image) + if m.GCPMachinePool.Spec.Image != nil { + sourceImage = *m.GCPMachinePool.Spec.Image + } else if m.GCPMachinePool.Spec.ImageFamily != nil { + sourceImage = *m.GCPMachinePool.Spec.ImageFamily + } + + diskType := infrav1exp.PdStandardDiskType + if t := m.GCPMachinePool.Spec.RootDeviceType; t != nil { + diskType = *t + } + + return &compute.AttachedDisk{ + AutoDelete: true, + Boot: true, + InitializeParams: &compute.AttachedDiskInitializeParams{ + DiskSizeGb: m.GCPMachinePool.Spec.RootDeviceSize, + DiskType: string(diskType), + SourceImage: sourceImage, + }, + } +} + +// Zone returns the zone for the machine pool. +func (m *MachinePoolScope) Zone() string { + if m.MachinePool.Spec.Template.Spec.FailureDomain == nil { + fd := m.ClusterGetter.FailureDomains() + if len(fd) == 0 { + return "" + } + zones := make([]string, 0, len(fd)) + for zone := range fd { + zones = append(zones, zone) + } + sort.Strings(zones) + return zones[0] + } + return *m.MachinePool.Spec.Template.Spec.FailureDomain +} + +// Role returns the machine role from the labels. +func (m *MachinePoolScope) Role() string { + return "node" +} + +// InstanceServiceAccountsSpec returns service-account spec. +func (m *MachinePoolScope) InstanceServiceAccountsSpec() *compute.ServiceAccount { + serviceAccount := &compute.ServiceAccount{ + Email: "default", + Scopes: []string{ + compute.CloudPlatformScope, + }, + } + + if m.GCPMachinePool.Spec.ServiceAccount != nil { + serviceAccount.Email = m.GCPMachinePool.Spec.ServiceAccount.Email + serviceAccount.Scopes = m.GCPMachinePool.Spec.ServiceAccount.Scopes + } + + return serviceAccount +} + +// InstanceGroupBuilder returns an instance group manager spec. +func (m *MachinePoolScope) InstanceGroupBuilder(instanceTemplateName string) *compute.InstanceGroupManager { + return &compute.InstanceGroupManager{ + Name: m.GCPMachinePool.Name, + BaseInstanceName: m.GCPMachinePool.Name, + InstanceTemplate: path.Join("projects", m.ClusterGetter.Project(), "global", "instanceTemplates", instanceTemplateName), + TargetSize: int64(*m.MachinePool.Spec.Replicas), + } +} + +// Project return the project for the GCPMachinePool's cluster. +func (m *MachinePoolScope) Project() string { + return m.ClusterGetter.Project() +} + +// GetGCPClientCredentials returns the GCP client credentials. +func (m *MachinePoolScope) GetGCPClientCredentials() ([]byte, error) { + credsPath := os.Getenv(ConfigFileEnvVar) + if credsPath == "" { + return nil, fmt.Errorf("no ADC environment variable found for credentials (expect %s)", ConfigFileEnvVar) + } + + byteValue, err := os.ReadFile(credsPath) //nolint:gosec // We need to read a file here + if err != nil { + return nil, fmt.Errorf("reading credentials from file %s: %w", credsPath, err) + } + return byteValue, nil +} + +// GetBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName. +func (m *MachinePoolScope) GetBootstrapData() (string, error) { + if m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { + return "", errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") + } + + secret := &corev1.Secret{} + key := types.NamespacedName{Namespace: m.Namespace(), Name: *m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName} + if err := m.Client.Get(context.TODO(), key, secret); err != nil { + return "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for GCPMachine %s/%s", m.Namespace(), m.Name()) + } + + value, ok := secret.Data["value"] + if !ok { + return "", errors.New("error retrieving bootstrap data: secret value key is missing") + } + + return string(value), nil +} + +// GetInstanceTemplateHash returns the hash of the instance template. The hash is used to identify the instance template. +func (m *MachinePoolScope) GetInstanceTemplateHash(instance *compute.InstanceTemplate) (string, error) { + instanceBytes, err := json.Marshal(instance) + if err != nil { + return "", err + } + + hash := sha256.Sum256(instanceBytes) + shortHash := hash[:4] + return fmt.Sprintf("%08x", shortHash), nil +} + +// SetAnnotation sets a key value annotation on the GCPMachinePool. +func (m *MachinePoolScope) SetAnnotation(key, value string) { + if m.GCPMachinePool.Annotations == nil { + m.GCPMachinePool.Annotations = map[string]string{} + } + m.GCPMachinePool.Annotations[key] = value +} + +// Namespace returns the GCPMachine namespace. +func (m *MachinePoolScope) Namespace() string { + return m.MachinePool.Namespace +} + +// Name returns the GCPMachine name. +func (m *MachinePoolScope) Name() string { + return m.GCPMachinePool.Name +} + +// HasReplicasExternallyManaged returns true if the machine pool has replicas externally managed. +func (m *MachinePoolScope) HasReplicasExternallyManaged(_ context.Context) bool { + return annotations.ReplicasManagedByExternalAutoscaler(m.MachinePool) +} + +// PatchCAPIMachinePoolObject persists the capi machinepool configuration and status. +func (m *MachinePoolScope) PatchCAPIMachinePoolObject(ctx context.Context) error { + return m.CapiMachinePoolPatchHelper.Patch( + ctx, + m.MachinePool, + ) +} + +// UpdateCAPIMachinePoolReplicas updates the associated MachinePool replica count. +func (m *MachinePoolScope) UpdateCAPIMachinePoolReplicas(_ context.Context, replicas *int32) { + m.MachinePool.Spec.Replicas = replicas +} + +// ReconcileReplicas ensures MachinePool replicas match MIG capacity if replicas are externally managed by an autoscaler. +func (m *MachinePoolScope) ReconcileReplicas(ctx context.Context, mig *compute.InstanceGroupManager) error { + log := log.FromContext(ctx) + + if !m.HasReplicasExternallyManaged(ctx) { + return nil + } + log.Info("Replicas are externally managed, skipping replica reconciliation", "machinepool", m.MachinePool.Name) + + var replicas int32 + if m.MachinePool.Spec.Replicas != nil { + replicas = *m.MachinePool.Spec.Replicas + } + + if capacity := int32(mig.TargetSize); capacity != replicas { + m.UpdateCAPIMachinePoolReplicas(ctx, &capacity) + } + + return nil +} + +// SetReplicas sets the machine pool replicas. +func (m *MachinePoolScope) SetReplicas(replicas int32) { + m.MachinePool.Spec.Replicas = &replicas +} + +// ConditionSetter returns the condition setter for the GCPMachinePool. +func (m *MachinePoolScope) ConditionSetter() conditions.Setter { + return m.GCPMachinePool +} diff --git a/cloud/services/compute/instancegroups/client.go b/cloud/services/compute/instancegroups/client.go new file mode 100644 index 000000000..8f9d67463 --- /dev/null +++ b/cloud/services/compute/instancegroups/client.go @@ -0,0 +1,139 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package instancegroups provides methods for managing GCP instance groups. +package instancegroups + +import ( + "context" + "fmt" + "time" + + "google.golang.org/api/compute/v1" + "google.golang.org/api/option" +) + +// Client wraps GCP SDK. +type Client interface { + // InstanceGroup Interfaces + GetInstanceGroup(ctx context.Context, project, zone, name string) (*compute.InstanceGroupManager, error) + CreateInstanceGroup(ctx context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) + UpdateInstanceGroup(ctx context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) + SetInstanceGroupTemplate(ctx context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) + DeleteInstanceGroup(ctx context.Context, project, zone, name string) (*compute.Operation, error) + // InstanceGroupTemplate Interfaces + GetInstanceTemplate(ctx context.Context, project, name string) (*compute.InstanceTemplate, error) + CreateInstanceTemplate(ctx context.Context, project string, instanceTemplate *compute.InstanceTemplate) (*compute.Operation, error) + DeleteInstanceTemplate(ctx context.Context, project, name string) (*compute.Operation, error) + WaitUntilOperationCompleted(project, operation string) error + WaitUntilComputeOperationCompleted(project, zone, operation string) error +} + +type ( + // GCPClient contains the GCP SDK client. + GCPClient struct { + service *compute.Service + } +) + +var _ Client = &GCPClient{} + +// NewGCPClient creates a new GCP SDK client. +func NewGCPClient(ctx context.Context, creds []byte) *GCPClient { + service, err := compute.NewService(ctx, option.WithCredentialsJSON(creds)) + if err != nil { + return nil + } + return &GCPClient{service: service} +} + +// GetInstanceGroup returns a specific instance group in a project and zone. +func (c *GCPClient) GetInstanceGroup(_ context.Context, project, zone, name string) (*compute.InstanceGroupManager, error) { + return c.service.InstanceGroupManagers.Get(project, zone, name).Do() +} + +// CreateInstanceGroup creates a new instance group in a project and zone. +func (c *GCPClient) CreateInstanceGroup(_ context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) { + return c.service.InstanceGroupManagers.Insert(project, zone, instanceGroup).Do() +} + +// UpdateInstanceGroup updates an instance group in a project and zone. +func (c *GCPClient) UpdateInstanceGroup(_ context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) { + return c.service.InstanceGroupManagers.Patch(project, zone, instanceGroup.Name, instanceGroup).Do() +} + +// SetInstanceGroupTemplate sets an instance group template in a project and zone. +func (c *GCPClient) SetInstanceGroupTemplate(_ context.Context, project, zone string, instanceGroup *compute.InstanceGroupManager) (*compute.Operation, error) { + return c.service.InstanceGroupManagers.SetInstanceTemplate(project, zone, instanceGroup.Name, &compute.InstanceGroupManagersSetInstanceTemplateRequest{ + InstanceTemplate: instanceGroup.InstanceTemplate, + }).Do() +} + +// DeleteInstanceGroup deletes an instance group in a project and zone. +func (c *GCPClient) DeleteInstanceGroup(_ context.Context, project, zone, name string) (*compute.Operation, error) { + return c.service.InstanceGroupManagers.Delete(project, zone, name).Do() +} + +// GetInstanceTemplate returns a specific instance template in a project. +func (c *GCPClient) GetInstanceTemplate(_ context.Context, project, name string) (*compute.InstanceTemplate, error) { + return c.service.InstanceTemplates.Get(project, name).Do() +} + +// CreateInstanceTemplate creates a new instance template in a project. +func (c *GCPClient) CreateInstanceTemplate(_ context.Context, project string, instanceTemplate *compute.InstanceTemplate) (*compute.Operation, error) { + return c.service.InstanceTemplates.Insert(project, instanceTemplate).Do() +} + +// DeleteInstanceTemplate deletes an instance template in a project. +func (c *GCPClient) DeleteInstanceTemplate(_ context.Context, project, name string) (*compute.Operation, error) { + return c.service.InstanceTemplates.Delete(project, name).Do() +} + +// WaitUntilOperationCompleted waits for an operation to complete. +func (c *GCPClient) WaitUntilOperationCompleted(projectID, operationName string) error { + for { + operation, err := c.service.GlobalOperations.Get(projectID, operationName).Do() + if err != nil { + return err + } + if operation.Status == "DONE" { + if operation.Error != nil { + return fmt.Errorf("operation failed: %v", operation.Error.Errors) + } + return nil + } + // Wait 1s before checking again to avoid spamming the API. + time.Sleep(1 * time.Second) + } +} + +// WaitUntilComputeOperationCompleted waits for a compute operation to complete. +func (c *GCPClient) WaitUntilComputeOperationCompleted(project, zone, operationName string) error { + for { + operation, err := c.service.ZoneOperations.Get(project, zone, operationName).Do() + if err != nil { + return err + } + if operation.Status == "DONE" { + if operation.Error != nil { + return fmt.Errorf("operation failed: %v", operation.Error.Errors) + } + return nil + } + // Wait 1s before checking again to avoid spamming the API. + time.Sleep(1 * time.Second) + } +} diff --git a/cloud/services/compute/instancegroups/doc.go b/cloud/services/compute/instancegroups/doc.go new file mode 100644 index 000000000..73b00d67b --- /dev/null +++ b/cloud/services/compute/instancegroups/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package instancegroups provides methods for managing GCP instance groups. +package instancegroups diff --git a/cloud/services/compute/instancegroups/instancegroups.go b/cloud/services/compute/instancegroups/instancegroups.go new file mode 100644 index 000000000..04f56d119 --- /dev/null +++ b/cloud/services/compute/instancegroups/instancegroups.go @@ -0,0 +1,240 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package instancegroups provides methods for managing GCP instance groups. +package instancegroups + +import ( + "context" + "strings" + "time" + + "google.golang.org/api/compute/v1" + "sigs.k8s.io/cluster-api-provider-gcp/cloud" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/gcperrors" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +type ( + // Service is a service for managing GCP instance groups. + Service struct { + scope *scope.MachinePoolScope + Client + } +) + +var _ cloud.ReconcilerWithResult = &Service{} + +// New creates a new instance group service. +func New(scope *scope.MachinePoolScope) *Service { + creds, err := scope.GetGCPClientCredentials() + if err != nil { + return nil + } + + return &Service{ + scope: scope, + Client: NewGCPClient(context.Background(), creds), + } +} + +// Reconcile gets/creates/updates a instance group. +func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.Info("Reconciling Instance Group") + + // Get the bootstrap data. + bootStrapToken, err := s.scope.GetBootstrapData() + if err != nil { + return ctrl.Result{}, err + } + // If the bootstrap data is empty, requeue. This is needed because the bootstrap data is not available until the bootstrap token is created. + if bootStrapToken == "" { + log.Info("Bootstrap token is empty, requeuing") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + // Build the instance template based on the GCPMachinePool Spec and the bootstrap data. + instanceTemplate := s.scope.InstanceGroupTemplateBuilder(bootStrapToken) + instanceTemplateHash, err := s.scope.GetInstanceTemplateHash(instanceTemplate) + if err != nil { + return ctrl.Result{}, err + } + + // Create the instance template name. + instanceTemplateName := s.scope.GCPMachinePool.Name + "-" + instanceTemplateHash + + // Get the instance template if it exists. If it doesn't, create it. If it does, update it. + _, err = s.Client.GetInstanceTemplate(ctx, s.scope.Project(), instanceTemplateName) + switch { + case err != nil && !gcperrors.IsNotFound(err): + log.Error(err, "Error looking for instance template") + return ctrl.Result{}, err + case err != nil && gcperrors.IsNotFound(err): + log.Info("Instance template not found, creating") + err = s.createInstanceTemplate(ctx, instanceTemplateName, instanceTemplate) + if err != nil { + return ctrl.Result{}, err + } + } + + instanceGroup, err := s.Client.GetInstanceGroup(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.GCPMachinePool.Name) + switch { + case err != nil && !gcperrors.IsNotFound(err): + log.Error(err, "Error looking for instance group") + return ctrl.Result{}, err + case err != nil && gcperrors.IsNotFound(err): + log.Info("Instance group not found, creating") + err = s.createInstanceGroup(ctx, instanceTemplateName) + if err != nil { + return ctrl.Result{}, err + } + case err == nil: + log.Info("Instance group found, updating") + err = s.patchInstanceGroup(ctx, instanceTemplateName, instanceGroup) + if err != nil { + log.Error(err, "Error updating instance group") + return ctrl.Result{}, err + } + } + + // Re-get the instance group after updating it. This is needed to get the latest status. + instanceGroup, err = s.Client.GetInstanceGroup(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.GCPMachinePool.Name) + if err != nil { + log.Error(err, "Error getting instance group") + return ctrl.Result{}, err + } + + log.Info("Instance group updated", "instance group", instanceGroup.Name, "instance group status", instanceGroup.Status, "instance group target size", instanceGroup.TargetSize, "instance group current size", instanceGroup.TargetSize) + // Set the status. + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GCPMachinePoolUpdatingCondition, infrav1exp.GCPMachinePoolUpdatedReason, clusterv1.ConditionSeverityInfo, "") + s.scope.SetReplicas(int32(instanceGroup.TargetSize)) + s.scope.MachinePool.Status.Replicas = int32(instanceGroup.TargetSize) + s.scope.MachinePool.Status.ReadyReplicas = int32(instanceGroup.TargetSize) + s.scope.GCPMachinePool.Status.Ready = true + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GCPMachinePoolReadyCondition) + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GCPMachinePoolCreatingCondition, infrav1exp.GCPMachinePoolUpdatedReason, clusterv1.ConditionSeverityInfo, "") + + return ctrl.Result{}, nil +} + +// createInstanceTemplate creates the instance template. +func (s *Service) createInstanceTemplate(ctx context.Context, instanceTemplateName string, instanceTemplate *compute.InstanceTemplate) error { + // Set the instance template name. This is used to identify the instance template. + instanceTemplate.Name = instanceTemplateName + + // Create the instance template in GCP. + instanceTemplateCreateOperation, err := s.Client.CreateInstanceTemplate(ctx, s.scope.Project(), instanceTemplate) + if err != nil { + return err + } + + // Wait for the instance group to be deleted + err = s.WaitUntilOperationCompleted(s.scope.Project(), instanceTemplateCreateOperation.Name) + if err != nil { + return err + } + + return nil +} + +// createInstanceGroup creates the instance group. +func (s *Service) createInstanceGroup(ctx context.Context, instanceTemplateName string) error { + // Create the instance group in GCP. + igCreationOperation, err := s.Client.CreateInstanceGroup(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.InstanceGroupBuilder(instanceTemplateName)) + if err != nil { + return err + } + + // Wait for the instance group to be deleted + err = s.WaitUntilComputeOperationCompleted(s.scope.Project(), s.scope.Zone(), igCreationOperation.Name) + if err != nil { + return err + } + + return nil +} + +// patchInstanceGroup patches the instance group. +func (s *Service) patchInstanceGroup(ctx context.Context, instanceTemplateName string, instanceGroup *compute.InstanceGroupManager) error { + log := log.FromContext(ctx) + + err := s.scope.ReconcileReplicas(ctx, instanceGroup) + if err != nil { + log.Error(err, "Error reconciling replicas") + return err + } + + lastSlashTemplateURI := strings.LastIndex(instanceGroup.InstanceTemplate, "/") + fetchedInstanceTemplateName := instanceGroup.InstanceTemplate[lastSlashTemplateURI+1:] + + // Check if instance group is already using the instance template. + if fetchedInstanceTemplateName != instanceTemplateName { + log.Info("Instance group is not using the instance template, setting instance template", "instance group", instanceGroup.InstanceTemplate, "instance template", instanceTemplateName) + // Set instance template. + _, err := s.Client.SetInstanceGroupTemplate(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.InstanceGroupBuilder(instanceTemplateName)) + if err != nil { + log.Error(err, "Error setting instance group template") + return err + } + } + + // If the instance group is already using the instance template, update the instance group. Otherwise, set the instance template. + if fetchedInstanceTemplateName == instanceTemplateName { + log.Info("Instance group is using the instance template, updating instance group") + instanceGroupUpdateOperation, err := s.Client.UpdateInstanceGroup(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.InstanceGroupBuilder(instanceTemplateName)) + if err != nil { + log.Error(err, "Error updating instance group") + return err + } + + err = s.WaitUntilComputeOperationCompleted(s.scope.Project(), s.scope.Zone(), instanceGroupUpdateOperation.Name) + if err != nil { + log.Error(err, "Error waiting for instance group update operation to complete") + return err + } + } + + return nil +} + +// Delete deletes the instance group. +func (s *Service) Delete(ctx context.Context) (ctrl.Result, error) { + log := log.FromContext(ctx) + + igDeletionOperation, err := s.DeleteInstanceGroup(ctx, s.scope.Project(), s.scope.GCPMachinePool.Spec.Zone, s.scope.GCPMachinePool.Name) + if err != nil { + if !gcperrors.IsNotFound(err) { + log.Error(err, "Error deleting instance group") + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + // Wait for the instance group to be deleted + err = s.WaitUntilOperationCompleted(s.scope.Project(), igDeletionOperation.Name) + if err != nil { + log.Error(err, "Error waiting for instance group deletion operation to complete") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml new file mode 100644 index 000000000..3640dbfe8 --- /dev/null +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml @@ -0,0 +1,265 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.13.0 + name: gcpmachinepools.infrastructure.cluster.x-k8s.io +spec: + group: infrastructure.cluster.x-k8s.io + names: + kind: GCPMachinePool + listKind: GCPMachinePoolList + plural: gcpmachinepools + singular: gcpmachinepool + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Cluster to which this GCPMachine belongs + jsonPath: .metadata.labels.cluster\.x-k8s\.io/cluster-name + name: Cluster + type: string + - description: Machine ready status + jsonPath: .status.ready + name: Ready + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: GCPMachinePool is the Schema for the gcpmachinepools API and + represents a GCP Machine Pool. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: GCPMachinePoolSpec defines the desired state of GCPMachinePool + and the GCP instances that it will create. + properties: + additionalDisks: + description: AdditionalDisks are optional non-boot attached disks. + items: + description: AttachedDiskSpec degined GCP machine disk. + properties: + deviceType: + description: 'DeviceType is a device type of the attached disk. + Supported types of non-root attached volumes: 1. "pd-standard" + - Standard (HDD) persistent disk 2. "pd-ssd" - SSD persistent + disk 3. "local-ssd" - Local SSD disk (https://cloud.google.com/compute/docs/disks/local-ssd). + Default is "pd-standard".' + enum: + - pd-standard + - pd-ssd + - pd-balanced + type: string + size: + description: Size is the size of the disk in GBs. Defaults to + 30GB. For "local-ssd" size is always 375GB. + format: int64 + type: integer + type: object + type: array + additionalLabels: + additionalProperties: + type: string + description: AdditionalLabels is an optional set of tags to add to + an instance, in addition to the ones added by default by the GCP + provider. If both the GCPCluster and the GCPMachinePool specify + the same tag name with different values, the GCPMachinePool's value + takes precedence. + type: object + additionalMetadata: + description: AdditionalMetadata is an optional set of metadata to + add to an instance, in addition to the ones added by default by + the GCP provider. + items: + description: MetadataItem is a key/value pair to add to the instance's + metadata. + properties: + key: + description: Key is the identifier for the metadata entry. + type: string + value: + description: Value is the value of the metadata entry. + type: string + required: + - key + type: object + type: array + x-kubernetes-list-map-keys: + - key + x-kubernetes-list-type: map + additionalNetworkTags: + description: AdditionalNetworkTags is a list of network tags that + should be applied to the instance. These tags are set in addition + to any network tags defined at the cluster level or in the actuator. + items: + type: string + type: array + image: + description: Image is the full reference to a valid image to be used + for this machine. Takes precedence over ImageFamily. + type: string + imageFamily: + description: ImageFamily is the family of the image to be used for + this machine. + type: string + instanceType: + description: 'InstanceType is the type of instance to create. Example: + n1.standard-2' + type: string + location: + description: Location is the GCP region location ex us-central1 + type: string + network: + description: Network is the network to be used by machines in the + machine pool. + type: string + publicIP: + description: PublicIP specifies whether the instance should get a + public IP. Set this to true if you don't have a NAT instances or + Cloud Nat setup. + type: boolean + rootDeviceSize: + description: RootDeviceSize is the size of the root volume in GB. + Defaults to 30. + format: int64 + type: integer + rootDeviceType: + description: 'RootDeviceType is the type of the root volume. Supported + types of root volumes: 1. "pd-standard" - Standard (HDD) persistent + disk 2. "pd-ssd" - SSD persistent disk Default is "pd-standard".' + enum: + - pd-standard + - pd-ssd + - pd-balanced + type: string + serviceAccounts: + description: 'ServiceAccount specifies the service account email and + which scopes to assign to the machine. Defaults to: email: "default", + scope: []{compute.CloudPlatformScope}' + properties: + email: + description: 'Email: Email address of the service account.' + type: string + scopes: + description: 'Scopes: The list of scopes to be made available + for this service account.' + items: + type: string + type: array + type: object + subnet: + description: Subnet is a reference to the subnetwork to use for this + instance. If not specified, the first subnetwork retrieved from + the Cluster Region and Network is picked. + type: string + zone: + description: Zone is the GCP zone location ex us-central1-a + type: string + required: + - instanceType + - location + - network + - zone + type: object + status: + description: GCPMachinePoolStatus defines the observed state of GCPMachinePool + and the GCP instances that it manages. + properties: + conditions: + description: Conditions specifies the conditions for the managed machine + pool + items: + description: Condition defines an observation of a Cluster API resource + operational state. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. This should be when the underlying condition changed. + If that is not known, then using the time when the API field + changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. This field may be empty. + type: string + reason: + description: The reason for the condition's last transition + in CamelCase. The specific API may choose whether or not this + field is considered a guaranteed API. This field may not be + empty. + type: string + severity: + description: Severity provides an explicit classification of + Reason code, so the users or machines can immediately understand + the current situation and act accordingly. The Severity field + MUST be set only when Status=False. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase or in foo.example.com/CamelCase. + Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. + type: string + required: + - lastTransitionTime + - status + - type + type: object + type: array + failureMessage: + description: "FailureMessage will be set in the event that there is + a terminal problem reconciling the MachinePool and will contain + a more verbose string suitable for logging and human consumption. + \n This field should not be set for transitive errors that a controller + faces that are expected to be fixed automatically over time (like + service outages), but instead indicate that something is fundamentally + wrong with the MachinePool's spec or the configuration of the controller, + and that manual intervention is required. Examples of terminal errors + would be invalid combinations of settings in the spec, values that + are unsupported by the controller, or the responsible controller + itself being critically misconfigured. \n Any transient errors that + occur during the reconciliation of MachinePools can be added as + events to the MachinePool object and/or logged in the controller's + output." + type: string + failureReason: + description: "FailureReason will be set in the event that there is + a terminal problem reconciling the MachinePool and will contain + a succinct value suitable for machine interpretation. \n This field + should not be set for transitive errors that a controller faces + that are expected to be fixed automatically over time (like service + outages), but instead indicate that something is fundamentally wrong + with the MachinePool's spec or the configuration of the controller, + and that manual intervention is required. Examples of terminal errors + would be invalid combinations of settings in the spec, values that + are unsupported by the controller, or the responsible controller + itself being critically misconfigured. \n Any transient errors that + occur during the reconciliation of MachinePools can be added as + events to the MachinePool object and/or logged in the controller's + output." + type: string + ready: + description: Ready is true when the provider resource is ready. + type: boolean + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 874c9179e..614b02be6 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -13,6 +13,7 @@ resources: - bases/infrastructure.cluster.x-k8s.io_gcpmanagedcontrolplanes.yaml - bases/infrastructure.cluster.x-k8s.io_gcpmanagedmachinepools.yaml +- bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml # +kubebuilder:scaffold:crdkustomizeresource patchesStrategicMerge: @@ -22,6 +23,7 @@ patchesStrategicMerge: - patches/webhook_in_gcpclusters.yaml - patches/webhook_in_gcpmachinetemplates.yaml - patches/webhook_in_gcpclustertemplates.yaml +#- patches/webhook_in_gcpmachinepools.yaml # +kubebuilder:scaffold:crdkustomizewebhookpatch # [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix. @@ -30,6 +32,7 @@ patchesStrategicMerge: - patches/cainjection_in_gcpclusters.yaml - patches/cainjection_in_gcpmachinetemplates.yaml - patches/cainjection_in_gcpclustertemplates.yaml +#- patches/cainjection_in_gcpmachinepools.yaml # +kubebuilder:scaffold:crdkustomizecainjectionpatch # the following config is for teaching kustomize how to do kustomization for CRDs. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 2669d0890..a4559e62d 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -39,6 +39,15 @@ rules: - subjectaccessreviews verbs: - create +- apiGroups: + - bootstrap.cluster.x-k8s.io + resources: + - kubeadmconfigs + - kubeadmconfigs/status + verbs: + - get + - list + - watch - apiGroups: - cluster.x-k8s.io resources: @@ -56,6 +65,8 @@ rules: verbs: - get - list + - patch + - update - watch - apiGroups: - cluster.x-k8s.io @@ -66,6 +77,14 @@ rules: - get - list - watch +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch - apiGroups: - infrastructure.cluster.x-k8s.io resources: @@ -86,6 +105,32 @@ rules: - get - patch - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - gcpmachinepools + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - gcpmachinepools/finalizers + verbs: + - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - gcpmachinepools/status + verbs: + - get + - patch + - update - apiGroups: - infrastructure.cluster.x-k8s.io resources: diff --git a/exp/api/v1beta1/conditions_consts.go b/exp/api/v1beta1/conditions_consts.go index 2214f2897..29ee60219 100644 --- a/exp/api/v1beta1/conditions_consts.go +++ b/exp/api/v1beta1/conditions_consts.go @@ -70,4 +70,28 @@ const ( GKEMachinePoolErrorReason = "GKEMachinePoolError" // GKEMachinePoolReconciliationFailedReason used to report failures while reconciling GKE node pool. GKEMachinePoolReconciliationFailedReason = "GKEMachinePoolReconciliationFailed" + + // GCPMachinePoolReadyCondition condition reports on the successful reconciliation of GCP machine pool. + GCPMachinePoolReadyCondition clusterv1.ConditionType = "GCPMachinePoolReady" + // GCPMachinePoolCreatingCondition condition reports on whether the GCP machine pool is creating. + GCPMachinePoolCreatingCondition clusterv1.ConditionType = "GCPMachinePoolCreating" + // GCPMachinePoolUpdatingCondition condition reports on whether the GCP machine pool is updating. + GCPMachinePoolUpdatingCondition clusterv1.ConditionType = "GCPMachinePoolUpdating" + // GCPMachinePoolDeletingCondition condition reports on whether the GCP machine pool is deleting. + GCPMachinePoolDeletingCondition clusterv1.ConditionType = "GCPMachinePoolDeleting" + // GCPMachinePoolDeletedReason used to report GCP machine pool is deleted. + GCPMachinePoolDeletedReason = "GCPMachinePoolDeleted" + + // GCPMachinePoolCreatingReason used to report GCP machine pool being created. + GCPMachinePoolCreatingReason = "GCPMachinePoolCreating" + // GCPMachinePoolCreatedReason used to report GCP machine pool is created. + GCPMachinePoolCreatedReason = "GCPMachinePoolCreated" + // GCPMachinePoolUpdatedReason used to report GCP machine pool is updated. + GCPMachinePoolUpdatedReason = "GCPMachinePoolUpdated" + // GCPMachinePoolDeletingReason used to report GCP machine pool being deleted. + GCPMachinePoolDeletingReason = "GCPMachinePoolDeleting" + // GCPMachinePoolErrorReason used to report GCP machine pool is in error state. + GCPMachinePoolErrorReason = "GCPMachinePoolError" + // GCPMachinePoolReconciliationFailedReason used to report failures while reconciling GCP machine pool. + GCPMachinePoolReconciliationFailedReason = "GCPMachinePoolReconciliationFailed" ) diff --git a/exp/api/v1beta1/gcpmachinepool_types.go b/exp/api/v1beta1/gcpmachinepool_types.go new file mode 100644 index 000000000..1111a0435 --- /dev/null +++ b/exp/api/v1beta1/gcpmachinepool_types.go @@ -0,0 +1,233 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/errors" +) + +const ( + // MachinePoolFinalizer allows ReconcileGCPMachinePool to clean up GCP resources associated with GCPMachinePool before removing it from the apiserver. + MachinePoolFinalizer = "gcpmachinepool.infrastructure.cluster.x-k8s.io" +) + +const ( + // PdStandardDiskType defines the name for the standard disk. + PdStandardDiskType DiskType = "pd-standard" + // PdSsdDiskType defines the name for the ssd disk. + PdSsdDiskType DiskType = "pd-ssd" + // LocalSsdDiskType defines the name for the local ssd disk. + LocalSsdDiskType DiskType = "local-ssd" +) + +// AttachedDiskSpec degined GCP machine disk. +type AttachedDiskSpec struct { + // DeviceType is a device type of the attached disk. + // Supported types of non-root attached volumes: + // 1. "pd-standard" - Standard (HDD) persistent disk + // 2. "pd-ssd" - SSD persistent disk + // 3. "local-ssd" - Local SSD disk (https://cloud.google.com/compute/docs/disks/local-ssd). + // Default is "pd-standard". + // +optional + DeviceType *DiskType `json:"deviceType,omitempty"` + // Size is the size of the disk in GBs. + // Defaults to 30GB. For "local-ssd" size is always 375GB. + // +optional + Size *int64 `json:"size,omitempty"` +} + +// ServiceAccount describes compute.serviceAccount. +type ServiceAccount struct { + // Email: Email address of the service account. + Email string `json:"email,omitempty"` + + // Scopes: The list of scopes to be made available for this service + // account. + Scopes []string `json:"scopes,omitempty"` +} + +// MetadataItem is a key/value pair to add to the instance's metadata. +type MetadataItem struct { + // Key is the identifier for the metadata entry. + Key string `json:"key"` + // Value is the value of the metadata entry. + Value *string `json:"value,omitempty"` +} + +// GCPMachinePoolSpec defines the desired state of GCPMachinePool and the GCP instances that it will create. +type GCPMachinePoolSpec struct { + + // AdditionalDisks are optional non-boot attached disks. + // +optional + AdditionalDisks []AttachedDiskSpec `json:"additionalDisks,omitempty"` + + // AdditionalNetworkTags is a list of network tags that should be applied to the + // instance. These tags are set in addition to any network tags defined + // at the cluster level or in the actuator. + // +optional + AdditionalNetworkTags []string `json:"additionalNetworkTags,omitempty"` + + // AdditionalMetadata is an optional set of metadata to add to an instance, in addition to the ones added by default by the + // GCP provider. + // +listType=map + // +listMapKey=key + // +optional + AdditionalMetadata []MetadataItem `json:"additionalMetadata,omitempty"` + + // AdditionalLabels is an optional set of tags to add to an instance, in addition to the ones added by default by the + // GCP provider. If both the GCPCluster and the GCPMachinePool specify the same tag name with different values, the + // GCPMachinePool's value takes precedence. + // +optional + AdditionalLabels infrav1.Labels `json:"additionalLabels,omitempty"` + + // InstanceType is the type of instance to create. Example: n1.standard-2 + InstanceType string `json:"instanceType"` + + // Image is the full reference to a valid image to be used for this machine. + // Takes precedence over ImageFamily. + // +optional + Image *string `json:"image,omitempty"` + + // ImageFamily is the family of the image to be used for this machine. + // +optional + ImageFamily *string `json:"imageFamily,omitempty"` + + // Location is the GCP region location ex us-central1 + Location string `json:"location"` + + // Network is the network to be used by machines in the machine pool. + Network string `json:"network"` + + // PublicIP specifies whether the instance should get a public IP. + // Set this to true if you don't have a NAT instances or Cloud Nat setup. + // +optional + PublicIP *bool `json:"publicIP,omitempty"` + + // RootDeviceSize is the size of the root volume in GB. + // Defaults to 30. + // +optional + RootDeviceSize int64 `json:"rootDeviceSize,omitempty"` + + // RootDeviceType is the type of the root volume. + // Supported types of root volumes: + // 1. "pd-standard" - Standard (HDD) persistent disk + // 2. "pd-ssd" - SSD persistent disk + // Default is "pd-standard". + // +optional + RootDeviceType *DiskType `json:"rootDeviceType,omitempty"` + + // ServiceAccount specifies the service account email and which scopes to assign to the machine. + // Defaults to: email: "default", scope: []{compute.CloudPlatformScope} + // +optional + ServiceAccount *ServiceAccount `json:"serviceAccounts,omitempty"` + + // Subnet is a reference to the subnetwork to use for this instance. If not specified, + // the first subnetwork retrieved from the Cluster Region and Network is picked. + // +optional + Subnet *string `json:"subnet,omitempty"` + + // Zone is the GCP zone location ex us-central1-a + Zone string `json:"zone"` +} + +// GCPMachinePoolStatus defines the observed state of GCPMachinePool and the GCP instances that it manages. +type GCPMachinePoolStatus struct { + + // Ready is true when the provider resource is ready. + // +optional + Ready bool `json:"ready"` + + // FailureReason will be set in the event that there is a terminal problem + // reconciling the MachinePool and will contain a succinct value suitable + // for machine interpretation. + // + // This field should not be set for transitive errors that a controller + // faces that are expected to be fixed automatically over + // time (like service outages), but instead indicate that something is + // fundamentally wrong with the MachinePool's spec or the configuration of + // the controller, and that manual intervention is required. Examples + // of terminal errors would be invalid combinations of settings in the + // spec, values that are unsupported by the controller, or the + // responsible controller itself being critically misconfigured. + // + // Any transient errors that occur during the reconciliation of MachinePools + // can be added as events to the MachinePool object and/or logged in the + // controller's output. + // +optional + FailureReason *errors.MachineStatusError `json:"failureReason,omitempty"` + + // FailureMessage will be set in the event that there is a terminal problem + // reconciling the MachinePool and will contain a more verbose string suitable + // for logging and human consumption. + // + // This field should not be set for transitive errors that a controller + // faces that are expected to be fixed automatically over + // time (like service outages), but instead indicate that something is + // fundamentally wrong with the MachinePool's spec or the configuration of + // the controller, and that manual intervention is required. Examples + // of terminal errors would be invalid combinations of settings in the + // spec, values that are unsupported by the controller, or the + // responsible controller itself being critically misconfigured. + // + // Any transient errors that occur during the reconciliation of MachinePools + // can be added as events to the MachinePool object and/or logged in the + // controller's output. + // +optional + FailureMessage *string `json:"failureMessage,omitempty"` + + // Conditions specifies the conditions for the managed machine pool + Conditions clusterv1.Conditions `json:"conditions,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Cluster",type="string",JSONPath=".metadata.labels.cluster\\.x-k8s\\.io/cluster-name",description="Cluster to which this GCPMachine belongs" +// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.ready",description="Machine ready status" + +// GCPMachinePool is the Schema for the gcpmachinepools API and represents a GCP Machine Pool. +type GCPMachinePool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec GCPMachinePoolSpec `json:"spec,omitempty"` + Status GCPMachinePoolStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// GCPMachinePoolList contains a list of GCPMachinePool resources. +type GCPMachinePoolList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []GCPMachinePool `json:"items"` +} + +// GetConditions returns the conditions for the GCPManagedMachinePool. +func (r *GCPMachinePool) GetConditions() clusterv1.Conditions { + return r.Status.Conditions +} + +// SetConditions sets the status conditions for the GCPManagedMachinePool. +func (r *GCPMachinePool) SetConditions(conditions clusterv1.Conditions) { + r.Status.Conditions = conditions +} +func init() { + infrav1.SchemeBuilder.Register(&GCPMachinePool{}, &GCPMachinePoolList{}) +} diff --git a/exp/api/v1beta1/zz_generated.deepcopy.go b/exp/api/v1beta1/zz_generated.deepcopy.go index c410dbaeb..863f1212b 100644 --- a/exp/api/v1beta1/zz_generated.deepcopy.go +++ b/exp/api/v1beta1/zz_generated.deepcopy.go @@ -24,8 +24,196 @@ import ( "k8s.io/apimachinery/pkg/runtime" apiv1beta1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" cluster_apiapiv1beta1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/errors" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AttachedDiskSpec) DeepCopyInto(out *AttachedDiskSpec) { + *out = *in + if in.DeviceType != nil { + in, out := &in.DeviceType, &out.DeviceType + *out = new(DiskType) + **out = **in + } + if in.Size != nil { + in, out := &in.Size, &out.Size + *out = new(int64) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AttachedDiskSpec. +func (in *AttachedDiskSpec) DeepCopy() *AttachedDiskSpec { + if in == nil { + return nil + } + out := new(AttachedDiskSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePool) DeepCopyInto(out *GCPMachinePool) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePool. +func (in *GCPMachinePool) DeepCopy() *GCPMachinePool { + if in == nil { + return nil + } + out := new(GCPMachinePool) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GCPMachinePool) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolList) DeepCopyInto(out *GCPMachinePoolList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]GCPMachinePool, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolList. +func (in *GCPMachinePoolList) DeepCopy() *GCPMachinePoolList { + if in == nil { + return nil + } + out := new(GCPMachinePoolList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GCPMachinePoolList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolSpec) DeepCopyInto(out *GCPMachinePoolSpec) { + *out = *in + if in.AdditionalDisks != nil { + in, out := &in.AdditionalDisks, &out.AdditionalDisks + *out = make([]AttachedDiskSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.AdditionalNetworkTags != nil { + in, out := &in.AdditionalNetworkTags, &out.AdditionalNetworkTags + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.AdditionalMetadata != nil { + in, out := &in.AdditionalMetadata, &out.AdditionalMetadata + *out = make([]MetadataItem, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.AdditionalLabels != nil { + in, out := &in.AdditionalLabels, &out.AdditionalLabels + *out = make(apiv1beta1.Labels, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Image != nil { + in, out := &in.Image, &out.Image + *out = new(string) + **out = **in + } + if in.ImageFamily != nil { + in, out := &in.ImageFamily, &out.ImageFamily + *out = new(string) + **out = **in + } + if in.PublicIP != nil { + in, out := &in.PublicIP, &out.PublicIP + *out = new(bool) + **out = **in + } + if in.RootDeviceType != nil { + in, out := &in.RootDeviceType, &out.RootDeviceType + *out = new(DiskType) + **out = **in + } + if in.ServiceAccount != nil { + in, out := &in.ServiceAccount, &out.ServiceAccount + *out = new(ServiceAccount) + (*in).DeepCopyInto(*out) + } + if in.Subnet != nil { + in, out := &in.Subnet, &out.Subnet + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolSpec. +func (in *GCPMachinePoolSpec) DeepCopy() *GCPMachinePoolSpec { + if in == nil { + return nil + } + out := new(GCPMachinePoolSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolStatus) DeepCopyInto(out *GCPMachinePoolStatus) { + *out = *in + if in.FailureReason != nil { + in, out := &in.FailureReason, &out.FailureReason + *out = new(errors.MachineStatusError) + **out = **in + } + if in.FailureMessage != nil { + in, out := &in.FailureMessage, &out.FailureMessage + *out = new(string) + **out = **in + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make(cluster_apiapiv1beta1.Conditions, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolStatus. +func (in *GCPMachinePoolStatus) DeepCopy() *GCPMachinePoolStatus { + if in == nil { + return nil + } + out := new(GCPMachinePoolStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GCPManagedCluster) DeepCopyInto(out *GCPManagedCluster) { *out = *in @@ -514,6 +702,26 @@ func (in *MasterAuthorizedNetworksConfigCidrBlock) DeepCopy() *MasterAuthorizedN return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetadataItem) DeepCopyInto(out *MetadataItem) { + *out = *in + if in.Value != nil { + in, out := &in.Value, &out.Value + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetadataItem. +func (in *MetadataItem) DeepCopy() *MetadataItem { + if in == nil { + return nil + } + out := new(MetadataItem) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeNetworkConfig) DeepCopyInto(out *NodeNetworkConfig) { *out = *in @@ -630,6 +838,26 @@ func (in *NodeSecurityConfig) DeepCopy() *NodeSecurityConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceAccount) DeepCopyInto(out *ServiceAccount) { + *out = *in + if in.Scopes != nil { + in, out := &in.Scopes, &out.Scopes + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAccount. +func (in *ServiceAccount) DeepCopy() *ServiceAccount { + if in == nil { + return nil + } + out := new(ServiceAccount) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ServiceAccountConfig) DeepCopyInto(out *ServiceAccountConfig) { *out = *in diff --git a/exp/controllers/gcpmachinepool_controller.go b/exp/controllers/gcpmachinepool_controller.go new file mode 100644 index 000000000..058454f59 --- /dev/null +++ b/exp/controllers/gcpmachinepool_controller.go @@ -0,0 +1,270 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "time" + + "github.com/googleapis/gax-go/v2/apierror" + "github.com/pkg/errors" + "google.golang.org/grpc/codes" + apierrors "k8s.io/apimachinery/pkg/api/errors" + infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/cloud" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/services/compute/instancegroups" + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/util/reconciler" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/predicates" + "sigs.k8s.io/cluster-api/util/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +// GCPMachinePoolReconciler reconciles a GCPMachinePool object and the corresponding MachinePool object. +type GCPMachinePoolReconciler struct { + client.Client + ReconcileTimeout time.Duration + WatchFilterValue string +} + +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepools,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepools/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepools/finalizers,verbs=update +// +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=kubeadmconfigs;kubeadmconfigs/status,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch +// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch + +// SetupWithManager sets up the controller with the Manager. +func (r *GCPMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { + log := log.FromContext(ctx).WithValues("controller", "GCPMachinePool") + + gvk, err := apiutil.GVKForObject(new(infrav1exp.GCPMachinePool), mgr.GetScheme()) + if err != nil { + return errors.Wrapf(err, "failed to find GVK for GCPMachinePool") + } + + c, err := ctrl.NewControllerManagedBy(mgr). + WithOptions(options). + For(&infrav1exp.GCPMachinePool{}). + WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(log, r.WatchFilterValue)). + Watches( + &expclusterv1.MachinePool{}, + handler.EnqueueRequestsFromMapFunc(machinePoolToInfrastructureMapFunc(gvk)), + ). + // watch for changes in KubeadmConfig to sync bootstrap token + Watches( + &kubeadmv1.KubeadmConfig{}, + handler.EnqueueRequestsFromMapFunc(KubeadmConfigToInfrastructureMapFunc(ctx, r.Client, log)), + builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), + ). + Build(r) + if err != nil { + return errors.Wrapf(err, "error creating controller") + } + + // Add a watch on clusterv1.Cluster object for unpause & ready notifications. + if err := c.Watch( + source.Kind(mgr.GetCache(), &clusterv1.Cluster{}), + handler.EnqueueRequestsFromMapFunc(util.ClusterToInfrastructureMapFunc(ctx, gvk, mgr.GetClient(), &infrav1exp.GCPMachinePool{})), + predicates.ClusterUnpausedAndInfrastructureReady(log), + ); err != nil { + return errors.Wrap(err, "failed adding a watch for ready clusters") + } + + return nil +} + +// Reconcile handles GCPMachinePool events and reconciles the corresponding MachinePool. +func (r *GCPMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { + ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultedLoopTimeout(r.ReconcileTimeout)) + + defer cancel() + + log := ctrl.LoggerFrom(ctx) + + // Fetch the GCPMachinePool instance. + gcpMachinePool := &infrav1exp.GCPMachinePool{} + if err := r.Get(ctx, req.NamespacedName, gcpMachinePool); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{Requeue: true}, nil + } + + // Get the MachinePool. + machinePool, err := GetOwnerMachinePool(ctx, r.Client, gcpMachinePool.ObjectMeta) + if err != nil { + log.Error(err, "Failed to retrieve owner MachinePool from the API Server") + return ctrl.Result{}, err + } + if machinePool == nil { + log.Info("Waiting for MachinePool Controller to set OwnerRef on GCPMachinePool") + return ctrl.Result{}, nil + } + + // Get the Cluster. + cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machinePool.ObjectMeta) + if err != nil { + log.Error(err, "Failed to retrieve owner Cluster from the API Server") + return ctrl.Result{}, err + } + if annotations.IsPaused(cluster, gcpMachinePool) { + log.Info("GCPMachinePool or linked Cluster is marked as paused. Won't reconcile") + return ctrl.Result{}, nil + } + + log = log.WithValues("cluster", cluster.Name) + gcpClusterName := client.ObjectKey{ + Namespace: gcpMachinePool.Namespace, + Name: cluster.Spec.InfrastructureRef.Name, + } + gcpCluster := &infrav1.GCPCluster{} + if err := r.Client.Get(ctx, gcpClusterName, gcpCluster); err != nil { + log.Info("GCPCluster is not available yet") + return ctrl.Result{}, err + } + + // Create the cluster scope + clusterScope, err := scope.NewClusterScope(ctx, scope.ClusterScopeParams{ + Client: r.Client, + Cluster: cluster, + GCPCluster: gcpCluster, + }) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create scope") + } + + // Create the machine pool scope + machinePoolScope, err := scope.NewMachinePoolScope(scope.MachinePoolScopeParams{ + Client: r.Client, + MachinePool: machinePool, + GCPMachinePool: gcpMachinePool, + ClusterGetter: clusterScope, + }) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create scope") + } + + // Make sure bootstrap data is available and populated. + if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { + log.Info("Bootstrap data secret reference is not yet available") + return reconcile.Result{}, nil + } + + defer func() { + if err := machinePoolScope.Close(ctx); err != nil && reterr == nil { + reterr = err + } + }() + + // Handle deleted machine pools + if !gcpMachinePool.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, machinePoolScope) + } + + // Handle non-deleted machine pools + return r.reconcileNormal(ctx, machinePoolScope) +} + +// reconcileNormal handles non-deleted GCPMachinePools. +func (r *GCPMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.Info("Reconciling GCPMachinePool") + + // If the GCPMachinePool has a status failure reason, return early. This is to avoid attempting to do anything to the GCPMachinePool if there is a known problem. + if machinePoolScope.GCPMachinePool.Status.FailureReason != nil || machinePoolScope.GCPMachinePool.Status.FailureMessage != nil { + log.Info("Found failure reason or message, returning early") + return ctrl.Result{}, nil + } + + // If the GCPMachinePool doesn't have our finalizer, add it. + controllerutil.AddFinalizer(machinePoolScope.GCPMachinePool, infrav1exp.MachinePoolFinalizer) + if err := machinePoolScope.PatchObject(ctx); err != nil { + return ctrl.Result{}, err + } + + reconcilers := []cloud.ReconcilerWithResult{ + instancegroups.New(machinePoolScope), + } + + for _, r := range reconcilers { + res, err := r.Reconcile(ctx) + if err != nil { + var e *apierror.APIError + if ok := errors.As(err, &e); ok { + if e.GRPCStatus().Code() == codes.FailedPrecondition { + log.Info("GCP API returned a failed precondition error, retrying") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + } + log.Error(err, "Failed to reconcile GCPMachinePool") + record.Warnf(machinePoolScope.GCPMachinePool, "FailedReconcile", "Failed to reconcile GCPMachinePool: %v", err) + return ctrl.Result{}, err + } + if res.Requeue { + return res, nil + } + } + + return ctrl.Result{}, nil +} + +// reconcileDelete handles deleted GCPMachinePools. +func (r *GCPMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope) (ctrl.Result, error) { + log := log.FromContext(ctx) + log.Info("Reconciling GCPMachinePool delete") + + reconcilers := []cloud.ReconcilerWithResult{ + instancegroups.New(machinePoolScope), + } + + for _, r := range reconcilers { + res, err := r.Delete(ctx) + if err != nil { + log.Error(err, "Failed to reconcile GCPMachinePool") + record.Warnf(machinePoolScope.GCPMachinePool, "FailedReconcile", "Failed to reconcile GCPMachinePool: %v", err) + return ctrl.Result{}, err + } + if res.Requeue { + return res, nil + } + } + + // Remove the finalizer + + controllerutil.RemoveFinalizer(machinePoolScope.GCPMachinePool, infrav1exp.MachinePoolFinalizer) + + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil +} diff --git a/exp/controllers/gcpmanagedmachinepool_controller.go b/exp/controllers/gcpmanagedmachinepool_controller.go index 47e2b81d4..d3c642ecb 100644 --- a/exp/controllers/gcpmanagedmachinepool_controller.go +++ b/exp/controllers/gcpmanagedmachinepool_controller.go @@ -198,23 +198,6 @@ func getMachinePoolByName(ctx context.Context, c client.Client, namespace, name return m, nil } -// getOwnerMachinePool returns the MachinePool object owning the current resource. -func getOwnerMachinePool(ctx context.Context, c client.Client, obj metav1.ObjectMeta) (*expclusterv1.MachinePool, error) { - for _, ref := range obj.OwnerReferences { - if ref.Kind != "MachinePool" { - continue - } - gv, err := schema.ParseGroupVersion(ref.APIVersion) - if err != nil { - return nil, errors.WithStack(err) - } - if gv.Group == expclusterv1.GroupVersion.Group { - return getMachinePoolByName(ctx, c, obj.Namespace, ref.Name) - } - } - return nil, nil -} - //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedmachinepools,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedmachinepools/status,verbs=get;update;patch //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedmachinepools/finalizers,verbs=update @@ -239,7 +222,7 @@ func (r *GCPManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctr } // Get the machine pool - machinePool, err := getOwnerMachinePool(ctx, r.Client, gcpManagedMachinePool.ObjectMeta) + machinePool, err := GetOwnerMachinePool(ctx, r.Client, gcpManagedMachinePool.ObjectMeta) if err != nil { log.Error(err, "Failed to retrieve owner MachinePool from the API Server") return ctrl.Result{}, err diff --git a/exp/controllers/helpers.go b/exp/controllers/helpers.go new file mode 100644 index 000000000..14c7a09a9 --- /dev/null +++ b/exp/controllers/helpers.go @@ -0,0 +1,112 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + + "github.com/go-logr/logr" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/cluster-api-provider-gcp/util/reconciler" + kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/pkg/errors" + "sigs.k8s.io/controller-runtime/pkg/client" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" +) + +// GetOwnerMachinePool returns the MachinePool object owning the current resource. +func GetOwnerMachinePool(ctx context.Context, c client.Client, obj metav1.ObjectMeta) (*expclusterv1.MachinePool, error) { + for _, ref := range obj.OwnerReferences { + if ref.Kind != "MachinePool" { + continue + } + gv, err := schema.ParseGroupVersion(ref.APIVersion) + if err != nil { + return nil, errors.WithStack(err) + } + if gv.Group == expclusterv1.GroupVersion.Group { + return getMachinePoolByName(ctx, c, obj.Namespace, ref.Name) + } + } + return nil, nil +} + +// KubeadmConfigToInfrastructureMapFunc returns a handler.ToRequestsFunc that watches for KubeadmConfig events and returns. +func KubeadmConfigToInfrastructureMapFunc(_ context.Context, c client.Client, log logr.Logger) handler.MapFunc { + return func(ctx context.Context, o client.Object) []reconcile.Request { + ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultMappingTimeout) + defer cancel() + + kc, ok := o.(*kubeadmv1.KubeadmConfig) + if !ok { + log.V(4).Info("attempt to map incorrect type", "type", fmt.Sprintf("%T", o)) + return nil + } + + mpKey := client.ObjectKey{ + Namespace: kc.Namespace, + Name: kc.Name, + } + + // fetch MachinePool to get reference + mp := &expclusterv1.MachinePool{} + if err := c.Get(ctx, mpKey, mp); err != nil { + if !apierrors.IsNotFound(err) { + log.Error(err, "failed to fetch MachinePool for KubeadmConfig") + } + return []reconcile.Request{} + } + + ref := mp.Spec.Template.Spec.Bootstrap.ConfigRef + if ref == nil { + log.V(4).Info("fetched MachinePool has no Bootstrap.ConfigRef") + return []reconcile.Request{} + } + sameKind := ref.Kind != o.GetObjectKind().GroupVersionKind().Kind + sameName := ref.Name == kc.Name + sameNamespace := ref.Namespace == kc.Namespace + if !sameKind || !sameName || !sameNamespace { + log.V(4).Info("Bootstrap.ConfigRef does not match", + "sameKind", sameKind, + "ref kind", ref.Kind, + "other kind", o.GetObjectKind().GroupVersionKind().Kind, + "sameName", sameName, + "sameNamespace", sameNamespace, + ) + return []reconcile.Request{} + } + + key := client.ObjectKey{ + Namespace: kc.Namespace, + Name: kc.Name, + } + log.V(4).Info("adding KubeadmConfig to watch", "key", key) + + return []reconcile.Request{ + { + NamespacedName: key, + }, + } + } +} diff --git a/main.go b/main.go index b875c3643..5cce987fc 100644 --- a/main.go +++ b/main.go @@ -26,12 +26,21 @@ import ( "os" "time" + kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" + // +kubebuilder:scaffold:imports "github.com/spf13/pflag" "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" cgrecord "k8s.io/client-go/tools/record" "k8s.io/klog/v2" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + capifeature "sigs.k8s.io/cluster-api/feature" + "sigs.k8s.io/cluster-api/util/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/controller" + infrav1beta1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" "sigs.k8s.io/cluster-api-provider-gcp/controllers" infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" @@ -39,13 +48,8 @@ import ( "sigs.k8s.io/cluster-api-provider-gcp/feature" "sigs.k8s.io/cluster-api-provider-gcp/util/reconciler" "sigs.k8s.io/cluster-api-provider-gcp/version" - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/util/flags" - "sigs.k8s.io/cluster-api/util/record" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" - "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/webhook" ) @@ -61,8 +65,12 @@ func init() { _ = clientgoscheme.AddToScheme(scheme) _ = infrav1beta1.AddToScheme(scheme) _ = clusterv1.AddToScheme(scheme) - _ = expclusterv1.AddToScheme(scheme) + _ = infrav1exp.AddToScheme(scheme) + _ = clientgoscheme.AddToScheme(scheme) + _ = expclusterv1.AddToScheme(scheme) + _ = kubeadmv1.AddToScheme(scheme) + // +kubebuilder:scaffold:scheme } @@ -226,6 +234,16 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) error { } } + if feature.Gates.Enabled(capifeature.MachinePool) { + setupLog.Info("Enabling MachinePool reconcilers") + + if err := (&expcontrollers.GCPMachinePoolReconciler{ + Client: mgr.GetClient(), + }).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: gcpMachineConcurrency}); err != nil { + return fmt.Errorf("setting up GCPMachinePool controller: %w", err) + } + } + return nil } @@ -257,6 +275,10 @@ func setupWebhooks(mgr ctrl.Manager) error { } } + if feature.Gates.Enabled(capifeature.MachinePool) { + setupLog.Info("Enabling MachinePool webhooks") + } + return nil }