From 9a013cea2570c4ea27b8f45c09b104c154af0f06 Mon Sep 17 00:00:00 2001
From: Jiahui <4543bxy@gmail.com>
Date: Mon, 31 Jul 2023 10:23:53 +0800
Subject: [PATCH] feat: adapt for gpu cost (#3596)

* add GPU monitoring

* add node role

* add persistentvolumeclaims role

* add payment-secret optional;
fix account Kubefile;
hide monitor useless log

* fix monitoring nil point error

* delete property string to lower
---
 controllers/account/deploy/Kubefile           |   2 +-
 .../account/deploy/manifests/deploy.yaml      |   1 +
 controllers/pkg/common/gpu/nvidia.go          | 120 ++++++++++++++++++
 controllers/pkg/common/resources.go           |   9 ++
 controllers/pkg/database/mongodb.go           |   3 +-
 controllers/resources/config/rbac/role.yaml   |  16 +++
 .../controllers/monitor_controller.go         | 116 +++++++++++------
 .../resources/deploy/manifests/deploy.yaml    |  16 +++
 8 files changed, 239 insertions(+), 44 deletions(-)
 create mode 100644 controllers/pkg/common/gpu/nvidia.go

diff --git a/controllers/account/deploy/Kubefile b/controllers/account/deploy/Kubefile
index 9e917de2b10..17f63c8677f 100644
--- a/controllers/account/deploy/Kubefile
+++ b/controllers/account/deploy/Kubefile
@@ -8,4 +8,4 @@ COPY manifests manifests
 ENV DEFAULT_NAMESPACE account-system
 ENV MONGO_URI "mongodb://mongo:27017/resources"
 
-CMD ["( kubectl create -f manifests/mongo-secret.yaml -n $DEFAULT_NAMESPACE || true ) && kubectl apply -f manifests/deploy.yaml -n $DEFAULT_NAMESPACE"]
+CMD ["( kubectl create ns $DEFAULT_NAMESPACE || true ) && ( kubectl create -f manifests/mongo-secret.yaml -n $DEFAULT_NAMESPACE || true ) && kubectl apply -f manifests/deploy.yaml -n $DEFAULT_NAMESPACE"]
diff --git a/controllers/account/deploy/manifests/deploy.yaml b/controllers/account/deploy/manifests/deploy.yaml
index 612523ae80b..6e56cb4738f 100644
--- a/controllers/account/deploy/manifests/deploy.yaml
+++ b/controllers/account/deploy/manifests/deploy.yaml
@@ -1168,6 +1168,7 @@ spec:
         envFrom:
         - secretRef:
             name: payment-secret
+            optional: true
         image: ghcr.io/labring/sealos-account-controller:latest
         imagePullPolicy: Always
         livenessProbe:
diff --git a/controllers/pkg/common/gpu/nvidia.go b/controllers/pkg/common/gpu/nvidia.go
new file mode 100644
index 00000000000..1b47889af93
--- /dev/null
+++ b/controllers/pkg/common/gpu/nvidia.go
@@ -0,0 +1,120 @@
+package gpu
+
+import (
+	"context"
+
+	corev1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+// nvidia labels for gpu
+const (
+	NvidiaGpuKey                          = "nvidia.com/gpu"
+	NvidiaCudaDriverMajorKey              = "nvidia.com/cuda.driver.major"
+	NvidiaCudaDriverMinorKey              = "nvidia.com/cuda.driver.minor"
+	NvidiaCudaDriverRevKey                = "nvidia.com/cuda.driver.rev"
+	NvidiaCudaRuntimeMajorKey             = "nvidia.com/cuda.runtime.major"
+	NvidiaCudaRuntimeMinorKey             = "nvidia.com/cuda.runtime.minor"
+	NvidiaGfdTimestampKey                 = "nvidia.com/gfd.timestamp"
+	NvidiaGpuComputeMajorKey              = "nvidia.com/gpu.compute.major"
+	NvidiaGpuComputeMinorKey              = "nvidia.com/gpu.compute.minor"
+	NvidiaGpuCountKey                     = "nvidia.com/gpu.count"
+	NvidiaGpuDeployContainerToolkitKey    = "nvidia.com/gpu.deploy.container-toolkit"
+	NvidiaGpuDeployDcgmKey                = "nvidia.com/gpu.deploy.dcgm"
+	NvidiaGpuDeployDcgmExporterKey        = "nvidia.com/gpu.deploy.dcgm-exporter"
+	NvidiaGpuDeployDevicePluginKey        = "nvidia.com/gpu.deploy.device-plugin"
+	NvidiaGpuDeployDriverKey              = "nvidia.com/gpu.deploy.driver"
+	NvidiaGpuDeployGpuFeatureDiscoveryKey = "nvidia.com/gpu.deploy.gpu-feature-discovery"
+	NvidiaGpuDeployNodeStatusExporterKey  = "nvidia.com/gpu.deploy.node-status-exporter"
+	NvidiaGpuDeployOperatorValidatorKey   = "nvidia.com/gpu.deploy.operator-validator"
+	NvidiaGpuFamilyKey                    = "nvidia.com/gpu.family"
+	NvidiaGpuMachineKey                   = "nvidia.com/gpu.machine"
+	NvidiaGpuMemoryKey                    = "nvidia.com/gpu.memory"
+	NvidiaGpuPresentKey                   = "nvidia.com/gpu.present"
+	NvidiaGpuProductKey                   = "nvidia.com/gpu.product"
+	NvidiaGpuReplicasKey                  = "nvidia.com/gpu.replicas"
+	NvidiaMigCapableKey                   = "nvidia.com/mig.capable"
+	NvidiaMigStrategyKey                  = "nvidia.com/mig.strategy"
+)
+
+type NvidiaGPU struct {
+	GpuInfo    Information
+	CudaInfo   CudaInformation
+	GpuDeploy  Deployment
+	GpuDetails DetailInformation
+	MigInfo    MigInformation
+}
+
+type Information struct {
+	Gpu         string
+	GpuCount    string
+	GpuPresent  string
+	GpuProduct  string
+	GpuReplicas string
+}
+
+type CudaInformation struct {
+	CudaDriverMajor  string
+	CudaDriverMinor  string
+	CudaDriverRev    string
+	CudaRuntimeMajor string
+	CudaRuntimeMinor string
+}
+
+type Deployment struct {
+	GpuDeployContainerToolkit    string
+	GpuDeployDcgm                string
+	GpuDeployDcgmExporter        string
+	GpuDeployDevicePlugin        string
+	GpuDeployDriver              string
+	GpuDeployGpuFeatureDiscovery string
+	GpuDeployNodeStatusExporter  string
+	GpuDeployOperatorValidator   string
+}
+
+type DetailInformation struct {
+	GpuComputeMajor string
+	GpuComputeMinor string
+	GpuFamily       string
+	GpuMachine      string
+	GpuMemory       string
+	GfdTimestamp    string
+}
+
+type MigInformation struct {
+	MigCapable  string
+	MigStrategy string
+}
+
+//nvidia.com/gpu
+
+func GetNodeGpuModel(c client.Client) (map[string]NvidiaGPU, error) {
+	nodeList := &corev1.NodeList{}
+	err := c.List(context.Background(), nodeList)
+	if err != nil {
+		return nil, err
+	}
+
+	gpuModels := make(map[string]NvidiaGPU)
+	for _, node := range nodeList.Items {
+		gpu := NvidiaGPU{
+			GpuInfo: Information{
+				Gpu:         node.Labels[NvidiaGpuKey],
+				GpuCount:    node.Labels[NvidiaGpuCountKey],
+				GpuPresent:  node.Labels[NvidiaGpuPresentKey],
+				GpuProduct:  node.Labels[NvidiaGpuProductKey],
+				GpuReplicas: node.Labels[NvidiaGpuReplicasKey],
+			},
+			CudaInfo: CudaInformation{
+				CudaDriverMajor:  node.Labels[NvidiaCudaDriverMajorKey],
+				CudaDriverMinor:  node.Labels[NvidiaCudaDriverMinorKey],
+				CudaDriverRev:    node.Labels[NvidiaCudaDriverRevKey],
+				CudaRuntimeMajor: node.Labels[NvidiaCudaRuntimeMajorKey],
+				CudaRuntimeMinor: node.Labels[NvidiaCudaRuntimeMinorKey],
+			},
+			// fill in the rest similarly...
+		}
+		gpuModels[node.Name] = gpu
+	}
+	return gpuModels, nil
+}
diff --git a/controllers/pkg/common/resources.go b/controllers/pkg/common/resources.go
index 71e8a2b9364..8609c04fbe5 100644
--- a/controllers/pkg/common/resources.go
+++ b/controllers/pkg/common/resources.go
@@ -6,6 +6,8 @@ import (
 	"math"
 	"time"
 
+	"github.com/labring/sealos/controllers/pkg/common/gpu"
+
 	"go.mongodb.org/mongo-driver/bson"
 	"go.mongodb.org/mongo-driver/mongo"
 
@@ -112,6 +114,12 @@ const (
 	PropertyInfraDisk   = "infra-disk"
 )
 
+const ResourceGPU corev1.ResourceName = gpu.NvidiaGpuKey
+
+func NewGpuResource(product string) corev1.ResourceName {
+	return corev1.ResourceName("gpu-" + product)
+}
+
 var (
 	bin1Mi  = resource.NewQuantity(1<<20, resource.BinarySI)
 	cpuUnit = resource.MustParse("1m")
@@ -119,6 +127,7 @@ var (
 
 var PricesUnit = map[corev1.ResourceName]*resource.Quantity{
 	corev1.ResourceCPU:     &cpuUnit, // 1 m CPU (1000 μ)
+	ResourceGPU:            &cpuUnit, // 1 m CPU (1000 μ)
 	corev1.ResourceMemory:  bin1Mi,   // 1 MiB
 	corev1.ResourceStorage: bin1Mi,   // 1 MiB
 }
diff --git a/controllers/pkg/database/mongodb.go b/controllers/pkg/database/mongodb.go
index a81e5d9774c..38199de61a0 100644
--- a/controllers/pkg/database/mongodb.go
+++ b/controllers/pkg/database/mongodb.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"math"
-	"strings"
 	"time"
 
 	"github.com/labring/sealos/controllers/pkg/crypto"
@@ -197,7 +196,7 @@ func (m *MongoDB) GetAllPricesMap() (map[string]common.Price, error) {
 		if err != nil {
 			return nil, fmt.Errorf("decrypt price error: %v", err)
 		}
-		pricesMap[strings.ToLower(prices[i].Property)] = common.Price{
+		pricesMap[prices[i].Property] = common.Price{
 			Price:    price,
 			Detail:   prices[i].Detail,
 			Property: prices[i].Property,
diff --git a/controllers/resources/config/rbac/role.yaml b/controllers/resources/config/rbac/role.yaml
index a0d28a48623..9b1c44d0798 100644
--- a/controllers/resources/config/rbac/role.yaml
+++ b/controllers/resources/config/rbac/role.yaml
@@ -13,6 +13,22 @@ rules:
   - get
   - list
   - watch
+- apiGroups:
+  - ""
+  resources:
+  - nodes
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - persistentvolumeclaims
+  verbs:
+  - get
+  - list
+  - watch
 - apiGroups:
   - ""
   resources:
diff --git a/controllers/resources/controllers/monitor_controller.go b/controllers/resources/controllers/monitor_controller.go
index 4ca7c540a68..d82164d1f30 100644
--- a/controllers/resources/controllers/monitor_controller.go
+++ b/controllers/resources/controllers/monitor_controller.go
@@ -25,6 +25,8 @@ import (
 	"sync"
 	"time"
 
+	"github.com/labring/sealos/controllers/pkg/common/gpu"
+
 	"golang.org/x/sync/semaphore"
 
 	"github.com/labring/sealos/pkg/utils/logger"
@@ -34,7 +36,6 @@ import (
 	meteringv1 "github.com/labring/sealos/controllers/metering/api/v1"
 	"github.com/labring/sealos/controllers/pkg/common"
 	"github.com/labring/sealos/controllers/pkg/database"
-	v1 "github.com/labring/sealos/controllers/user/api/v1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/runtime"
@@ -52,6 +53,7 @@ type MonitorReconciler struct {
 	stopCh            chan struct{}
 	wg                sync.WaitGroup
 	periodicReconcile time.Duration
+	NvidiaGpu         map[string]gpu.NvidiaGPU
 }
 
 type quantity struct {
@@ -67,6 +69,8 @@ const (
 
 var namespaceMonitorFuncs = make(map[string]func(ctx context.Context, dbClient database.Interface, namespace *corev1.Namespace) error)
 
+//+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch
+//+kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch
 //+kubebuilder:rbac:groups=core,resources=namespaces,verbs=get;list;watch
 //+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch
 //+kubebuilder:rbac:groups=core,resources=resourcequotas,verbs=get;list;watch
@@ -87,9 +91,15 @@ func NewMonitorReconciler(mgr ctrl.Manager) (*MonitorReconciler, error) {
 		return nil, fmt.Errorf("mongo uri is empty")
 	}
 	r.initNamespaceFuncs()
-	if err := r.preApply(); err != nil {
+	err := r.preApply()
+	if err != nil {
 		return nil, err
 	}
+	r.NvidiaGpu, err = gpu.GetNodeGpuModel(mgr.GetClient())
+	if err != nil {
+		return nil, fmt.Errorf("failed to get node gpu model: %v", err)
+	}
+	r.Logger.Info("get gpu model", "gpu model", r.NvidiaGpu)
 	r.startPeriodicReconcile()
 	return r, nil
 }
@@ -268,27 +278,24 @@ func (r *MonitorReconciler) podResourceUsage(ctx context.Context, dbClient datab
 		return err
 	}
 	rs := initResources()
+	hasStorageQuota := false
 	if err := r.Get(ctx, client.ObjectKey{Name: meteringv1.ResourceQuotaPrefix + namespace.Name, Namespace: namespace.Name}, &quota); err != nil {
 		if client.IgnoreNotFound(err) != nil {
 			return err
 		}
-		if _, ok := namespace.GetAnnotations()[v1.UserAnnotationCreatorKey]; ok {
-			//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=get;list;watch;create;update;patch;delete
-			//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles,verbs=get;list;watch;create;update;patch;delete
-			//if err = r.syncResourceQuota(ctx, namespace.Name); err != nil {
-			//	r.Logger.Error(err, "sync resource quota failed", "namespace", namespace.Name)
-			//}
-			r.Logger.Error(fmt.Errorf("resources quota is empty"), "", "namespace", namespace.Name)
-		}
+		//if _, ok := namespace.GetAnnotations()[v1.UserAnnotationOwnerKey]; ok {
+		//	r.Logger.Error(fmt.Errorf("resources quota is empty"), "", "namespace", namespace.Name)
+		//}
 		rs[corev1.ResourceStorage].detail = "no resource quota"
 	} else {
+		hasStorageQuota = true
 		rs[corev1.ResourceStorage].Add(*quota.Status.Used.Name("requests.storage", resource.BinarySI))
 	}
 	for _, pod := range podList.Items {
 		// TODO pending status need skip?
-		if pod.Status.Phase != corev1.PodRunning /*&& pod.Status.Phase != corev1.PodPending*/ {
-			continue
-		}
+		//if pod.Status.Phase != corev1.PodRunning /*&& pod.Status.Phase != corev1.PodPending*/ {
+		//	continue
+		//}
 		for _, container := range pod.Spec.Containers {
 			if cpuRequest, ok := container.Resources.Limits[corev1.ResourceCPU]; ok {
 				rs[corev1.ResourceCPU].Add(cpuRequest)
@@ -300,56 +307,83 @@ func (r *MonitorReconciler) podResourceUsage(ctx context.Context, dbClient datab
 			} else {
 				rs[corev1.ResourceMemory].Add(container.Resources.Requests[corev1.ResourceMemory])
 			}
+			// gpu only use limit
+			if gpuRequest, ok := container.Resources.Limits[gpu.NvidiaGpuKey]; ok {
+				gpuModel, ok := r.NvidiaGpu[pod.Spec.NodeName]
+				if !ok {
+					var err error
+					r.NvidiaGpu, err = gpu.GetNodeGpuModel(r.Client)
+					if err != nil {
+						logger.Error(err, "get node gpu model failed")
+						continue
+					}
+					gpuModel, ok = r.NvidiaGpu[pod.Spec.NodeName]
+					if !ok {
+						logger.Error(fmt.Errorf("node %s not found gpu model", pod.Spec.NodeName), "")
+						continue
+					}
+				}
+				if _, ok := rs[common.NewGpuResource(gpuModel.GpuInfo.GpuProduct)]; !ok {
+					rs[common.NewGpuResource(gpuModel.GpuInfo.GpuProduct)] = initGpuResources()
+				}
+				logger.Info("gpu request", "pod", pod.Name, "namespace", pod.Namespace, "gpu req", gpuRequest.String(), "node", pod.Spec.NodeName, "gpu model", gpuModel.GpuInfo.GpuProduct)
+				rs[common.NewGpuResource(gpuModel.GpuInfo.GpuProduct)].Add(gpuRequest)
+			}
 		}
 	}
-	cpuValue, memoryValue, storageValue := getResourceValue(corev1.ResourceCPU, rs), getResourceValue(corev1.ResourceMemory, rs), getResourceValue(corev1.ResourceStorage, rs)
-	var monitors []*common.Monitor
-	if cpuValue > 0 {
-		monitors = append(monitors, &common.Monitor{
-			Category: namespace.Name,
-			Property: corev1.ResourceCPU.String(),
-			Value:    cpuValue,
-			Time:     timeStamp,
-			Detail:   rs[corev1.ResourceCPU].String(),
-		})
-	}
-	if memoryValue > 0 {
-		monitors = append(monitors, &common.Monitor{
-			Category: namespace.Name,
-			Property: corev1.ResourceMemory.String(),
-			Value:    memoryValue,
-			Time:     timeStamp,
-			Detail:   rs[corev1.ResourceMemory].String(),
-		})
+	if !hasStorageQuota {
+		pvcList := corev1.PersistentVolumeClaimList{}
+		if err := r.List(context.Background(), &pvcList, &client.ListOptions{Namespace: namespace.Name}); err != nil {
+			return err
+		}
+		for _, pvc := range pvcList.Items {
+			if pvc.Status.Phase != corev1.ClaimBound {
+				continue
+			}
+			rs[corev1.ResourceStorage].Add(pvc.Spec.Resources.Requests[corev1.ResourceStorage])
+		}
 	}
-	if storageValue > 0 {
-		monitors = append(monitors, &common.Monitor{
-			Category: namespace.Name,
-			Property: corev1.ResourceStorage.String(),
-			Value:    storageValue,
-			Time:     timeStamp,
-			Detail:   rs[corev1.ResourceStorage].String(),
-		})
+	var monitors []*common.Monitor
+	for resour, value := range rs {
+		v := getResourceValue(resour, rs)
+		if v > 0 {
+			monitors = append(monitors, &common.Monitor{
+				Category: namespace.Name,
+				Property: resour.String(),
+				Value:    v,
+				Time:     timeStamp,
+				Detail:   value.detail,
+			})
+		}
 	}
 	return dbClient.InsertMonitor(ctx, monitors...)
 }
 
 func getResourceValue(resourceName corev1.ResourceName, res map[corev1.ResourceName]*quantity) int64 {
 	quantity := res[resourceName]
+	priceUnit := common.PricesUnit[resourceName]
+	if strings.Contains(resourceName.String(), "gpu") {
+		priceUnit = common.PricesUnit[common.ResourceGPU]
+	}
 	if quantity != nil && quantity.MilliValue() != 0 {
-		return int64(math.Ceil(float64(quantity.MilliValue()) / float64(common.PricesUnit[resourceName].MilliValue())))
+		return int64(math.Ceil(float64(quantity.MilliValue()) / float64(priceUnit.MilliValue())))
 	}
 	return 0
 }
 
 func initResources() (rs map[corev1.ResourceName]*quantity) {
 	rs = make(map[corev1.ResourceName]*quantity)
+	rs[common.ResourceGPU] = initGpuResources()
 	rs[corev1.ResourceCPU] = &quantity{Quantity: resource.NewQuantity(0, resource.DecimalSI), detail: ""}
 	rs[corev1.ResourceMemory] = &quantity{Quantity: resource.NewQuantity(0, resource.BinarySI), detail: ""}
 	rs[corev1.ResourceStorage] = &quantity{Quantity: resource.NewQuantity(0, resource.BinarySI), detail: ""}
 	return
 }
 
+func initGpuResources() *quantity {
+	return &quantity{Quantity: resource.NewQuantity(0, resource.DecimalSI), detail: ""}
+}
+
 func (r *MonitorReconciler) infraResourceUsage(ctx context.Context, dbClient database.Interface, namespace *corev1.Namespace) error {
 	var infraList infrav1.InfraList
 	if err := r.List(ctx, &infraList, client.InNamespace(namespace.Name)); err != nil {
diff --git a/controllers/resources/deploy/manifests/deploy.yaml b/controllers/resources/deploy/manifests/deploy.yaml
index 94157ee36f1..e7349be104a 100644
--- a/controllers/resources/deploy/manifests/deploy.yaml
+++ b/controllers/resources/deploy/manifests/deploy.yaml
@@ -63,6 +63,22 @@ rules:
   - get
   - list
   - watch
+- apiGroups:
+  - ""
+  resources:
+  - nodes
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - persistentvolumeclaims
+  verbs:
+  - get
+  - list
+  - watch
 - apiGroups:
   - ""
   resources: