From fe366adc63547edf23a6c512f5d177d9736afa29 Mon Sep 17 00:00:00 2001
From: Samuel Stuchly <sssstuchly@gmail.com>
Date: Wed, 15 Sep 2021 16:34:10 +0200
Subject: [PATCH] add support for GPUs on GCP

---
 .../v1beta1/gcpmachineproviderconfig_types.go |  39 ++++--
 .../v1beta1/zz_generated.deepcopy.go          |  31 +++++
 pkg/cloud/gcp/actuators/machine/reconciler.go | 122 +++++++++++++++++-
 .../services/compute/computeservice.go        |  40 +++++-
 .../services/compute/computeservice_mock.go   |  13 ++
 5 files changed, 225 insertions(+), 20 deletions(-)

diff --git a/pkg/apis/gcpprovider/v1beta1/gcpmachineproviderconfig_types.go b/pkg/apis/gcpprovider/v1beta1/gcpmachineproviderconfig_types.go
index 04aa60f95..d31343d22 100644
--- a/pkg/apis/gcpprovider/v1beta1/gcpmachineproviderconfig_types.go
+++ b/pkg/apis/gcpprovider/v1beta1/gcpmachineproviderconfig_types.go
@@ -22,22 +22,26 @@ type GCPMachineProviderSpec struct {
 	// CredentialsSecret is a reference to the secret with GCP credentials.
 	CredentialsSecret *corev1.LocalObjectReference `json:"credentialsSecret,omitempty"`
 
-	CanIPForward       bool                   `json:"canIPForward"`
-	DeletionProtection bool                   `json:"deletionProtection"`
-	Disks              []*GCPDisk             `json:"disks,omitempty"`
-	Labels             map[string]string      `json:"labels,omitempty"`
-	Metadata           []*GCPMetadata         `json:"gcpMetadata,omitempty"`
-	NetworkInterfaces  []*GCPNetworkInterface `json:"networkInterfaces,omitempty"`
-	ServiceAccounts    []GCPServiceAccount    `json:"serviceAccounts"`
-	Tags               []string               `json:"tags,omitempty"`
-	TargetPools        []string               `json:"targetPools,omitempty"`
-	MachineType        string                 `json:"machineType"`
-	Region             string                 `json:"region"`
-	Zone               string                 `json:"zone"`
-	ProjectID          string                 `json:"projectID,omitempty"`
+	CanIPForward       bool                    `json:"canIPForward"`
+	DeletionProtection bool                    `json:"deletionProtection"`
+	Disks              []*GCPDisk              `json:"disks,omitempty"`
+	Labels             map[string]string       `json:"labels,omitempty"`
+	Metadata           []*GCPMetadata          `json:"gcpMetadata,omitempty"`
+	NetworkInterfaces  []*GCPNetworkInterface  `json:"networkInterfaces,omitempty"`
+	ServiceAccounts    []GCPServiceAccount     `json:"serviceAccounts"`
+	Tags               []string                `json:"tags,omitempty"`
+	TargetPools        []string                `json:"targetPools,omitempty"`
+	MachineType        string                  `json:"machineType"`
+	Region             string                  `json:"region"`
+	Zone               string                  `json:"zone"`
+	ProjectID          string                  `json:"projectID,omitempty"`
+	GuestAccelerators  []*GCPAcceleratorConfig `json:"guestAccelerators,omitempty"`
 
 	// Preemptible indicates if created instance is preemptible
 	Preemptible bool `json:"preemptible,omitempty"`
+
+	OnHostMaintenance string `json:"onHostMaintenance,omitempty"`
+	AutomaticRestart  *bool  `json:"automaticRestart,omitempty"`
 }
 
 // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -104,3 +108,12 @@ type GCPKMSKeyReference struct {
 	// Location is the GCP location in which the Key Ring exists.
 	Location string `json:"location"`
 }
+
+// GCPAcceleratorConfig describes type and count of accelerator cards attached to the instance on GCP.
+type GCPAcceleratorConfig struct {
+	// AcceleratorCount is number of AcceleratorType accelerators (GPUs) to be attached to an instance
+	AcceleratorCount int64 `json:"acceleratorCount,omitempty"`
+	// AcceleratorType is the type of accelerator (GPU) to be attached to an instance.
+	// Supported accelerator types are: nvidia-tesla-k80, nvidia-tesla-p100, nvidia-tesla-v100, nvidia-tesla-a100, nvidia-tesla-p4, nvidia-tesla-t4
+	AcceleratorType string `json:"acceleratorType,omitempty"`
+}
diff --git a/pkg/apis/gcpprovider/v1beta1/zz_generated.deepcopy.go b/pkg/apis/gcpprovider/v1beta1/zz_generated.deepcopy.go
index 9f3d6d443..ce0a6c919 100644
--- a/pkg/apis/gcpprovider/v1beta1/zz_generated.deepcopy.go
+++ b/pkg/apis/gcpprovider/v1beta1/zz_generated.deepcopy.go
@@ -25,6 +25,21 @@ import (
 	"k8s.io/apimachinery/pkg/runtime"
 )
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *GCPAcceleratorConfig) DeepCopyInto(out *GCPAcceleratorConfig) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPAcceleratorConfig.
+func (in *GCPAcceleratorConfig) DeepCopy() *GCPAcceleratorConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(GCPAcceleratorConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *GCPDisk) DeepCopyInto(out *GCPDisk) {
 	*out = *in
@@ -176,6 +191,22 @@ func (in *GCPMachineProviderSpec) DeepCopyInto(out *GCPMachineProviderSpec) {
 		*out = make([]string, len(*in))
 		copy(*out, *in)
 	}
+	if in.GuestAccelerators != nil {
+		in, out := &in.GuestAccelerators, &out.GuestAccelerators
+		*out = make([]*GCPAcceleratorConfig, len(*in))
+		for i := range *in {
+			if (*in)[i] != nil {
+				in, out := &(*in)[i], &(*out)[i]
+				*out = new(GCPAcceleratorConfig)
+				**out = **in
+			}
+		}
+	}
+	if in.AutomaticRestart != nil {
+		in, out := &in.AutomaticRestart, &out.AutomaticRestart
+		*out = new(bool)
+		**out = **in
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachineProviderSpec.
diff --git a/pkg/cloud/gcp/actuators/machine/reconciler.go b/pkg/cloud/gcp/actuators/machine/reconciler.go
index fb5531367..aa793633b 100644
--- a/pkg/cloud/gcp/actuators/machine/reconciler.go
+++ b/pkg/cloud/gcp/actuators/machine/reconciler.go
@@ -3,10 +3,9 @@ package machine
 import (
 	"context"
 	"fmt"
-	"time"
-
 	"strconv"
 	"strings"
+	"time"
 
 	"github.com/openshift/cluster-api-provider-gcp/pkg/apis/gcpprovider/v1beta1"
 	machinev1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1"
@@ -25,6 +24,8 @@ const (
 	requeueAfterSeconds = 20
 	instanceLinkFmt     = "https://www.googleapis.com/compute/v1/projects/%s/zones/%s/instances/%s"
 	kmsKeyNameFmt       = "projects/%s/locations/%s/keyRings/%s/cryptoKeys/%s"
+	machineTypeFmt      = "zones/%s/machineTypes/%s"
+	acceleratorTypeFmt  = "zones/%s/acceleratorTypes/%s"
 )
 
 // Reconciler are list of services required by machine actuator, easy to create a fake
@@ -39,6 +40,98 @@ func newReconciler(scope *machineScope) *Reconciler {
 	}
 }
 
+var (
+	supportedGpuTypes = map[string]string{
+		"nvidia-tesla-k80":  "NVIDIA_K80_GPUS",
+		"nvidia-tesla-p100": "NVIDIA_P100_GPUS",
+		"nvidia-tesla-v100": "NVIDIA_V100_GPUS",
+		"nvidia-tesla-a100": "NVIDIA_A100_GPUS",
+		"nvidia-tesla-p4":   "NVIDIA_P4_GPUS",
+		"nvidia-tesla-t4":   "NVIDIA_T4_GPUS",
+	}
+)
+
+func containsString(sli []string, str string) bool {
+	for _, elem := range sli {
+		if elem == str {
+			return true
+		}
+	}
+	return false
+}
+
+// machineTypeAcceleratorCount represents nvidia-tesla-A100 GPUs which are only compatible with A2 machine family
+func (r *Reconciler) checkQuota(machineTypeAcceleratorCount int64) error {
+	region, err := r.computeService.RegionGet(r.projectID, r.providerSpec.Region)
+	if err != nil {
+		return machinecontroller.InvalidMachineConfiguration(fmt.Sprintf("Failed to get region %s via compute service: %v", r.providerSpec.Region, err))
+	}
+	quotas := region.Quotas
+	var guestAccelerators = []*v1beta1.GCPAcceleratorConfig{}
+	// When the machine type has associated accelerator instances (A2 machine family), accelerators will be nvidia-tesla-A100s.
+	// Additional guest accelerators are not allowed so ignore the providerSpec GuestAccelerators.
+	if machineTypeAcceleratorCount != 0 {
+		guestAccelerators = append(guestAccelerators, &v1beta1.GCPAcceleratorConfig{AcceleratorType: "nvidia-tesla-a100", AcceleratorCount: machineTypeAcceleratorCount})
+	} else {
+		guestAccelerators = r.providerSpec.GuestAccelerators
+	}
+	// validate zone and then quota
+	// guestAccelerators slice can not store more than 1 element.
+	// More than one accelerator included in request results in error -> googleapi: Error 413: Value for field 'resource.guestAccelerators' is too large: maximum size 1 element(s); actual size 2., fieldSizeTooLarge
+	accelerator := guestAccelerators[0]
+	_, err = r.computeService.AcceleratorTypeGet(r.projectID, r.providerSpec.Zone, accelerator.AcceleratorType)
+	if err != nil {
+		return machinecontroller.InvalidMachineConfiguration(fmt.Sprintf("AcceleratorType %s not available in the zone %s : %v", accelerator.AcceleratorType, r.providerSpec.Zone, err))
+	}
+	metric := supportedGpuTypes[accelerator.AcceleratorType]
+	if metric == "" {
+		return machinecontroller.InvalidMachineConfiguration(fmt.Sprintf("Unsupported accelerator type %s", accelerator.AcceleratorType))
+	}
+	// preemptible instances have separate quota
+	if r.providerSpec.Preemptible {
+		metric = "PREEMPTIBLE_" + metric
+	}
+	// check quota for GA
+	for i, q := range quotas {
+		if q.Metric == metric {
+			if int64(q.Usage)+accelerator.AcceleratorCount > int64(q.Limit) {
+				return machinecontroller.InvalidMachineConfiguration(fmt.Sprintf("Quota exceeded. Metric: %s. Usage: %v. Limit: %v.", metric, q.Usage, q.Limit))
+			}
+			break
+		}
+		if i == len(quotas)-1 {
+			return machinecontroller.InvalidMachineConfiguration(fmt.Sprintf("No quota found. Metric: %s.", metric))
+		}
+	}
+	return nil
+}
+
+func (r *Reconciler) validateGuestAccelerators() error {
+	if len(r.providerSpec.GuestAccelerators) == 0 && !strings.HasPrefix(r.providerSpec.MachineType, "a2-") {
+		// no accelerators to validate so return nil
+		return nil
+	}
+	if len(r.providerSpec.GuestAccelerators) > 0 && strings.HasPrefix(r.providerSpec.MachineType, "a2-") {
+		return machinecontroller.InvalidMachineConfiguration("A2 Machine types have pre-attached guest accelerators. Adding additional guest accelerators is not supported")
+	}
+	if !strings.HasPrefix(r.providerSpec.MachineType, "n1-") && !strings.HasPrefix(r.providerSpec.MachineType, "a2-") {
+		return machinecontroller.InvalidMachineConfiguration(fmt.Sprintf("MachineType %s does not support accelerators. Only A2 and N1 machine type families support guest acceleartors.", r.providerSpec.MachineType))
+	}
+	a2MachineFamily, n1MachineFamily := r.computeService.GPUCompatibleMachineTypesList(r.providerSpec.ProjectID, r.providerSpec.Zone, r.Context)
+	machineType := r.providerSpec.MachineType
+	switch {
+	case a2MachineFamily[machineType] != 0:
+		// a2 family machine - has fixed type and count of GPUs
+		return r.checkQuota(a2MachineFamily[machineType])
+	case containsString(n1MachineFamily, machineType):
+		// n1 family machine
+		return r.checkQuota(0)
+	default:
+		// any other machine type
+		return machinecontroller.InvalidMachineConfiguration(fmt.Sprintf("MachineType %s is not available in the zone %s.", r.providerSpec.MachineType, r.providerSpec.Zone))
+	}
+}
+
 // Create creates machine if and only if machine exists, handled by cluster-api
 func (r *Reconciler) create() error {
 	if err := validateMachine(*r.machine, *r.providerSpec); err != nil {
@@ -50,16 +143,35 @@ func (r *Reconciler) create() error {
 		CanIpForward:       r.providerSpec.CanIPForward,
 		DeletionProtection: r.providerSpec.DeletionProtection,
 		Labels:             r.providerSpec.Labels,
-		MachineType:        fmt.Sprintf("zones/%s/machineTypes/%s", zone, r.providerSpec.MachineType),
+		MachineType:        fmt.Sprintf(machineTypeFmt, zone, r.providerSpec.MachineType),
 		Name:               r.machine.Name,
 		Tags: &compute.Tags{
 			Items: r.providerSpec.Tags,
 		},
 		Scheduling: &compute.Scheduling{
-			Preemptible: r.providerSpec.Preemptible,
+			Preemptible:       r.providerSpec.Preemptible,
+			AutomaticRestart:  r.providerSpec.AutomaticRestart,
+			OnHostMaintenance: r.providerSpec.OnHostMaintenance,
 		},
 	}
 
+	var guestAccelerators = []*compute.AcceleratorConfig{}
+
+	if l := len(r.providerSpec.GuestAccelerators); l == 1 {
+		guestAccelerators = append(guestAccelerators, &compute.AcceleratorConfig{
+			AcceleratorType:  fmt.Sprintf(acceleratorTypeFmt, zone, r.providerSpec.GuestAccelerators[0].AcceleratorType),
+			AcceleratorCount: r.providerSpec.GuestAccelerators[0].AcceleratorCount,
+		})
+	} else if l > 1 {
+		return machinecontroller.InvalidMachineConfiguration("More than one type of accelerator provided. Instances support only one accelerator type at a time.")
+	}
+
+	instance.GuestAccelerators = guestAccelerators
+
+	if err := r.validateGuestAccelerators(); err != nil {
+		return err
+	}
+
 	if instance.Labels == nil {
 		instance.Labels = map[string]string{}
 	}
@@ -70,7 +182,7 @@ func (r *Reconciler) create() error {
 	for _, disk := range r.providerSpec.Disks {
 		srcImage := disk.Image
 		if !strings.Contains(disk.Image, "/") {
-			// only image name provided therfore defaulting to the current project
+			// only image name provided therefore defaulting to the current project
 			srcImage = googleapi.ResolveRelative(r.computeService.BasePath(), fmt.Sprintf("%s/global/images/%s", r.projectID, disk.Image))
 		}
 
diff --git a/pkg/cloud/gcp/actuators/services/compute/computeservice.go b/pkg/cloud/gcp/actuators/services/compute/computeservice.go
index 13dd4bea7..59fcc3082 100644
--- a/pkg/cloud/gcp/actuators/services/compute/computeservice.go
+++ b/pkg/cloud/gcp/actuators/services/compute/computeservice.go
@@ -2,11 +2,14 @@ package computeservice
 
 import (
 	"context"
+	"log"
+	"strings"
 
-	"github.com/openshift/cluster-api-provider-gcp/pkg/version"
 	"golang.org/x/oauth2/google"
-	"google.golang.org/api/compute/v1"
 	"google.golang.org/api/option"
+
+	"github.com/openshift/cluster-api-provider-gcp/pkg/version"
+	"google.golang.org/api/compute/v1"
 )
 
 // GCPComputeService is a pass through wrapper for google.golang.org/api/compute/v1/compute
@@ -22,6 +25,9 @@ type GCPComputeService interface {
 	TargetPoolsAddInstance(project string, region string, name string, instance string) (*compute.Operation, error)
 	TargetPoolsRemoveInstance(project string, region string, name string, instance string) (*compute.Operation, error)
 	MachineTypesGet(project string, machineType string, zone string) (*compute.MachineType, error)
+	RegionGet(project string, region string) (*compute.Region, error)
+	GPUCompatibleMachineTypesList(project string, zone string, ctx context.Context) (map[string]int64, []string)
+	AcceleratorTypeGet(project string, zone string, acceleratorType string) (*compute.AcceleratorType, error)
 }
 
 type computeService struct {
@@ -106,3 +112,33 @@ func (c *computeService) TargetPoolsRemoveInstance(project string, region string
 func (c *computeService) MachineTypesGet(project string, zone string, machineType string) (*compute.MachineType, error) {
 	return c.service.MachineTypes.Get(project, zone, machineType).Do()
 }
+
+// GPUCompatibleMachineTypesList function lists machineTypes available in the zone and return map of A2 family and slice of N1 family machineTypes
+func (c *computeService) GPUCompatibleMachineTypesList(project string, zone string, ctx context.Context) (map[string]int64, []string) {
+	req := c.service.MachineTypes.List(project, zone)
+	var (
+		a2MachineFamily = map[string]int64{}
+		n1MachineFamily []string
+	)
+	if err := req.Pages(ctx, func(page *compute.MachineTypeList) error {
+		for _, machineType := range page.Items {
+			if strings.HasPrefix(machineType.Name, "a2") {
+				a2MachineFamily[machineType.Name] = machineType.Accelerators[0].GuestAcceleratorCount
+			} else if strings.HasPrefix(machineType.Name, "n1") {
+				n1MachineFamily = append(n1MachineFamily, machineType.Name)
+			}
+		}
+		return nil
+	}); err != nil {
+		log.Fatal(err)
+	}
+	return a2MachineFamily, n1MachineFamily
+}
+
+func (c *computeService) AcceleratorTypeGet(project string, zone string, acceleratorType string) (*compute.AcceleratorType, error) {
+	return c.service.AcceleratorTypes.Get(project, zone, acceleratorType).Do()
+}
+
+func (c *computeService) RegionGet(project string, region string) (*compute.Region, error) {
+	return c.service.Regions.Get(project, region).Do()
+}
diff --git a/pkg/cloud/gcp/actuators/services/compute/computeservice_mock.go b/pkg/cloud/gcp/actuators/services/compute/computeservice_mock.go
index a0993d826..73c5b3a3b 100644
--- a/pkg/cloud/gcp/actuators/services/compute/computeservice_mock.go
+++ b/pkg/cloud/gcp/actuators/services/compute/computeservice_mock.go
@@ -1,6 +1,8 @@
 package computeservice
 
 import (
+	"context"
+
 	compute "google.golang.org/api/compute/v1"
 	"google.golang.org/api/googleapi"
 )
@@ -129,3 +131,14 @@ func MockBuilderFuncTypeNotFound(serviceAccountJSON string) (GCPComputeService,
 	}
 	return computeSvc, nil
 }
+
+func (c *GCPComputeServiceMock) RegionGet(project string, region string) (*compute.Region, error) {
+	return nil, nil
+}
+
+func (c *GCPComputeServiceMock) GPUCompatibleMachineTypesList(project string, zone string, ctx context.Context) (map[string]int64, []string) {
+	return nil, nil
+}
+func (c *GCPComputeServiceMock) AcceleratorTypeGet(project string, zone string, acceleratorType string) (*compute.AcceleratorType, error) {
+	return nil, nil
+}