fix: Add SYSTEM_ARGS env and fix related issue

kubeagi · Apr 11, 2024 · 2ef657c · 2ef657c
1 parent 43cadf6
commit 2ef657c
Show file tree

Hide file tree

Showing 9 changed files with 25 additions and 34 deletions.
diff --git a/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml b/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml
@@ -25,7 +25,7 @@ spec:
     image: kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z
     imagePullPolicy: IfNotPresent
   runner: 
-    image: kubeagi/arcadia-fastchat-worker:v0.2.36
+    image: kubeagi/arcadia-fastchat-worker:vllm-v0.4.0-hotfix
     imagePullPolicy: IfNotPresent
   resources:
     limits:

diff --git a/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml b/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml
@@ -12,7 +12,7 @@ spec:
     image: kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z
     imagePullPolicy: IfNotPresent
   runner: 
-    image: kubeagi/arcadia-fastchat-worker:v0.2.36
+    image: kubeagi/arcadia-fastchat-worker:vllm-v0.4.0-hotfix
     imagePullPolicy: IfNotPresent
   model:
     kind: "Models"

diff --git a/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml b/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml
@@ -15,7 +15,7 @@ spec:
     image: kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z
     imagePullPolicy: IfNotPresent
   runner: 
-    image: kubeagi/arcadia-fastchat-worker:v0.2.36
+    image: kubeagi/arcadia-fastchat-worker:vllm-v0.4.0-hotfix
     imagePullPolicy: IfNotPresent
   resources:
     limits:

diff --git a/config/samples/ray.io_v1_raycluster.yaml b/config/samples/ray.io_v1_raycluster.yaml
@@ -18,7 +18,7 @@ spec:
           runAsGroup: 0
           fsGroup: 0
         containers:
-          - image: kubeagi/ray-ml:2.9.3-py39-vllm
+          - image: kubeagi/ray-ml:2.9.3-py39-vllm-0.4.0
             name: ray-head
             resources:
               limits:
@@ -48,7 +48,7 @@ spec:
             app.kubernetes.io/name: kuberay
         spec:
           containers:
-            - image: kubeagi/ray-ml:2.9.3-py39-vllm
+            - image: kubeagi/ray-ml:2.9.3-py39-vllm-0.4.0
               name: ray-worker
               resources:
                 limits:

diff --git a/deploy/charts/arcadia/Chart.yaml b/deploy/charts/arcadia/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v2
 name: arcadia
 description: A Helm chart(Also a KubeBB Component) for KubeAGI Arcadia
 type: application
-version: 0.3.29
+version: 0.3.30
 appVersion: "0.2.1"
 
 keywords:

diff --git a/deploy/charts/llm-worker/Chart.yaml b/deploy/charts/llm-worker/Chart.yaml
@@ -30,6 +30,3 @@ sources:
 keywords:
   - kubeagi
   - LLMOps
-maintainers:
-  - name: lanture1064
-    url: https://github.com/lanture1064
diff --git a/deploy/charts/llm-worker/values.yaml b/deploy/charts/llm-worker/values.yaml
@@ -5,7 +5,7 @@ image:
   repository: kubeagi/arcadia-fastchat-worker
   pullPolicy: IfNotPresent
   # Overrides the image tag whose default is the chart appVersion.
-  tag: "v0.2.0"
+  tag: "vllm-v0.4.0-hotfix"
   env:
   - name: FASTCHAT_MODEL_NAME
     value: "baichuan2-7b"

diff --git a/deploy/llms/start-worker.sh b/deploy/llms/start-worker.sh
@@ -31,4 +31,4 @@ python3.9 -m $FASTCHAT_WORKER_NAME --model-names $FASTCHAT_REGISTRATION_MODEL_NA
     --model-path $FASTCHAT_MODEL_NAME_PATH --worker-address $FASTCHAT_WORKER_ADDRESS \
     --controller-address $FASTCHAT_CONTROLLER_ADDRESS \
     --num-gpus $NUMBER_GPUS  \
-    --host 0.0.0.0 --port 21002 $EXTRA_ARGS
+    --host 0.0.0.0 --port 21002 $SYSTEM_ARGS $EXTRA_ARGS
diff --git a/pkg/worker/runner.go b/pkg/worker/runner.go
@@ -93,24 +93,16 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.
 		return nil, fmt.Errorf("failed to get arcadia config with %w", err)
 	}
 
-	extraAgrs := ""
-	for _, envItem := range runner.w.Spec.AdditionalEnvs {
-		if envItem.Name == "EXTRA_ARGS" {
-			extraAgrs = envItem.Value
-			break
-		}
-	}
-
 	modelFileDir := fmt.Sprintf("%s/%s", defaultModelMountPath, model.Name)
 	additionalEnvs := []corev1.EnvVar{}
-	extraArgs := fmt.Sprintf("--device %s %s", runner.Device().String(), extraAgrs)
+	systemArgs := fmt.Sprintf("--device %s", runner.Device().String())
 	if runner.modelFileFromRemote {
 		m := arcadiav1alpha1.Model{}
 		if err := runner.c.Get(ctx, types.NamespacedName{Namespace: *model.Namespace, Name: model.Name}, &m); err != nil {
 			return nil, err
 		}
 		if m.Spec.Revision != "" {
-			extraArgs += fmt.Sprintf(" --revision %s ", m.Spec.Revision)
+			systemArgs += fmt.Sprintf(" --revision %s ", m.Spec.Revision)
 		}
 		if m.Spec.ModelSource == modelSourceFromHugginfFace {
 			modelFileDir = m.Spec.HuggingFaceRepo
@@ -139,7 +131,6 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.
 			{Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s:%d", runner.w.Name+WokerCommonSuffix, runner.w.Namespace, arcadiav1alpha1.DefaultWorkerPort)},
 			{Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller},
 			{Name: "NUMBER_GPUS", Value: runner.NumberOfGPUs()},
-			{Name: "EXTRA_ARGS", Value: extraArgs},
 		},
 		Ports: []corev1.ContainerPort{
 			{Name: "http", ContainerPort: arcadiav1alpha1.DefaultWorkerPort},
@@ -149,6 +140,7 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.
 		},
 		Resources: runner.w.Spec.Resources,
 	}
+	additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "SYSTEM_ARGS", Value: systemArgs})
 
 	container.Env = append(container.Env, additionalEnvs...)
 	return container, nil
@@ -193,12 +185,12 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 		return nil, fmt.Errorf("failed to get arcadia config with %w", err)
 	}
 
-	extraAgrs := ""
+	systemArgs := ""
 	additionalEnvs := []corev1.EnvVar{}
 
 	// configure ray cluster
 	resources := runner.w.Spec.Resources
-	gpus := runner.NumberOfGPUs()
+	gpuEnvExist := false
 	// default ray cluster which can only utilize gpus on single nodes
 	rayCluster := config.DefaultRayCluster()
 	for _, envItem := range runner.w.Spec.AdditionalEnvs {
@@ -223,12 +215,10 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 		// By default, gpu_memory_utilization will be 0.9
 		if envItem.Name == "GPU_MEMORY_UTILIZATION" {
 			gpuMemoryUtilization, _ := strconv.ParseFloat(envItem.Value, 64)
-			extraAgrs += fmt.Sprintf(" --gpu_memory_utilization %f", gpuMemoryUtilization)
+			systemArgs += fmt.Sprintf(" --gpu_memory_utilization %f", gpuMemoryUtilization)
 		}
-
-		// extra arguments to run llm
-		if envItem.Name == "EXTRA_ARGS" {
-			extraAgrs = envItem.Value
+		if envItem.Name == "NUMBER_GPUS" {
+			gpuEnvExist = true
 		}
 	}
 	klog.V(5).Infof("run worker with raycluster:\n %s", rayCluster.String())
@@ -245,18 +235,16 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 			Name:  "PYTHON_VERSION",
 			Value: rayCluster.GetPythonVersion(),
 		})
-	// Set gpu number to the number of GPUs in the worker's resource
-	additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "NUMBER_GPUS", Value: gpus})
 
 	modelFileDir := fmt.Sprintf("%s/%s", defaultModelMountPath, model.Name)
-	extraAgrs = fmt.Sprintf("%s --trust-remote-code", extraAgrs)
+	systemArgs = fmt.Sprintf("%s --trust-remote-code", systemArgs)
 	if runner.modelFileFromRemote {
 		m := arcadiav1alpha1.Model{}
 		if err := runner.c.Get(ctx, types.NamespacedName{Namespace: *model.Namespace, Name: model.Name}, &m); err != nil {
 			return nil, err
 		}
 		if m.Spec.Revision != "" {
-			extraAgrs += fmt.Sprintf(" --revision %s", m.Spec.Revision)
+			systemArgs += fmt.Sprintf(" --revision %s", m.Spec.Revision)
 		}
 		if m.Spec.ModelSource == modelSourceFromHugginfFace {
 			modelFileDir = m.Spec.HuggingFaceRepo
@@ -283,7 +271,6 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 			{Name: "FASTCHAT_MODEL_NAME", Value: model.Name},
 			{Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s:%d", runner.w.Name+WokerCommonSuffix, runner.w.Namespace, arcadiav1alpha1.DefaultWorkerPort)},
 			{Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller},
-			{Name: "EXTRA_ARGS", Value: extraAgrs},
 		},
 		Ports: []corev1.ContainerPort{
 			{Name: "http", ContainerPort: arcadiav1alpha1.DefaultWorkerPort},
@@ -295,6 +282,13 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 		},
 		Resources: resources,
 	}
+	if !gpuEnvExist {
+		// if env doesn't exist, set gpu number to the number of GPUs in the worker's resource
+		additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "NUMBER_GPUS", Value: runner.NumberOfGPUs()})
+	}
+
+	additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "SYSTEM_ARGS", Value: systemArgs})
+
 	container.Env = append(container.Env, additionalEnvs...)
 	return container, nil
 }