Skip to content

Commit

Permalink
Use ray start block in Pod's entrypoint(#77)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffwan committed Jan 5, 2022
1 parent b685822 commit 96d4b33
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 18 deletions.
2 changes: 2 additions & 0 deletions examples/bytedance/ray-cluster.complete.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
spec:
Expand Down Expand Up @@ -87,6 +88,7 @@ spec:
rayStartParams:
redis-password: 'LetMeInRay'
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
metadata:
Expand Down
8 changes: 3 additions & 5 deletions examples/bytedance/ray-cluster.heterogeneous.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,14 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
spec:
# initContainers: # to avoid worker crashing before head service is created
containers:
- name: ray-head
image: hub.byted.org/kuberay/ray:1.6.0
# you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: ["sleep"]
args:
- 'infinity'
env:
- name: MY_POD_IP
valueFrom:
Expand Down Expand Up @@ -67,6 +63,7 @@ spec:
rayStartParams:
redis-password: 'LetMeInRay'
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
spec:
Expand Down Expand Up @@ -134,6 +131,7 @@ spec:
rayStartParams:
redis-password: 'LetMeInRay'
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
spec:
Expand Down
1 change: 1 addition & 0 deletions examples/bytedance/ray-cluster.mini.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ spec:
# webui_host: "10.1.2.60"
dashboard-host: '0.0.0.0'
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
spec:
Expand Down
27 changes: 22 additions & 5 deletions ray-operator/controllers/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,13 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
Spec: podTemplateSpec.Spec,
}
index := getRayContainerIndex(pod)
cont := concatenateContainerCommand(rayNodeType, rayStartParams)

addEmptyDir(&pod.Spec.Containers[index], &pod)
cleanupInvalidVolumeMounts(&pod.Spec.Containers[index], &pod)
if len(pod.Spec.InitContainers) > index {
cleanupInvalidVolumeMounts(&pod.Spec.InitContainers[index], &pod)
}

// saving temporarily the old command and args
var cmd, args string
if len(pod.Spec.Containers[index].Command) > 0 {
cmd = convertCmdToString(pod.Spec.Containers[index].Command)
Expand All @@ -89,17 +87,25 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
cmd += convertCmdToString(pod.Spec.Containers[index].Args)
}
if !strings.Contains(cmd, "ray start") {
cont := concatenateContainerCommand(rayNodeType, rayStartParams)
// replacing the old command
// replacing the old command
pod.Spec.Containers[index].Command = []string{"/bin/bash", "-c", "--"}
if cmd != "" {
// sleep infinity is used to keep the pod `running` after the last command exits, and not go into `completed` state
args = fmt.Sprintf("%s && %s && %s", cont, cmd, "sleep infinity")
args = fmt.Sprintf("%s && %s", cont, cmd)
} else {
args = fmt.Sprintf("%s && %s", cont, "sleep infinity")
args = cont
}

if !isRayStartWithBlock(rayStartParams) {
// sleep infinity is used to keep the pod `running` after the last command exits, and not go into `completed` state
args = args + " && sleep infinity"
}

pod.Spec.Containers[index].Args = []string{args}
}

for index := range pod.Spec.InitContainers {
setInitContainerEnvVars(&pod.Spec.InitContainers[index], svcName)
}
Expand All @@ -109,6 +115,13 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
return pod
}

func isRayStartWithBlock(rayStartParams map[string]string) bool {
if blockValue, exist := rayStartParams["block"]; exist {
return strings.ToLower(blockValue) == "true"
}
return false
}

func convertCmdToString(cmdArr []string) (cmd string) {
cmdAggr := new(bytes.Buffer)
for _, v := range cmdArr {
Expand Down Expand Up @@ -264,7 +277,11 @@ func concatenateContainerCommand(nodeType rayiov1alpha1.RayNodeType, rayStartPar
func convertParamMap(rayStartParams map[string]string) (s string) {
flags := new(bytes.Buffer)
for k, v := range rayStartParams {
fmt.Fprintf(flags, " --%s=%s ", k, v)
if strings.ToLower(v) == "true" {
fmt.Fprintf(flags, " --%s ", k)
} else {
fmt.Fprintf(flags, " --%s=%s ", k, v)
}
}
return flags.String()
}
Expand Down
31 changes: 23 additions & 8 deletions ray-operator/controllers/common/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package common
import (
"fmt"
"reflect"
"sort"
"strings"
"testing"

Expand Down Expand Up @@ -43,10 +44,8 @@ var instance = &rayiov1alpha1.RayCluster{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
corev1.Container{
Name: "ray-head",
Image: "rayproject/autoscaler",
Command: []string{"python"},
Args: []string{"/opt/code.py"},
Name: "ray-head",
Image: "rayproject/autoscaler",
Env: []corev1.EnvVar{
corev1.EnvVar{
Name: "MY_POD_IP",
Expand All @@ -72,6 +71,7 @@ var instance = &rayiov1alpha1.RayCluster{
"port": "6379",
"redis-password": "LetMeInRay",
"num-cpus": "1",
"block": "true",
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -84,10 +84,8 @@ var instance = &rayiov1alpha1.RayCluster{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
corev1.Container{
Name: "ray-worker",
Image: "rayproject/autoscaler",
Command: []string{"echo"},
Args: []string{"Hello Ray"},
Name: "ray-worker",
Image: "rayproject/autoscaler",
Env: []corev1.EnvVar{
corev1.EnvVar{
Name: "MY_POD_IP",
Expand Down Expand Up @@ -141,4 +139,21 @@ func TestBuildPod(t *testing.T) {
if !reflect.DeepEqual(expectedResult, actualResult) {
t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult)
}

expectedCommandArg := splitAndSort("ulimit -n 65536; ray start --block --num-cpus=1 --address=raycluster-sample-head-svc:6379 --port=6379 --redis-password=LetMeInRay")
if !reflect.DeepEqual(expectedCommandArg, splitAndSort(pod.Spec.Containers[0].Args[0])) {
t.Fatalf("Expected `%v` but got `%v`", expectedCommandArg, pod.Spec.Containers[0].Args)
}
}

func splitAndSort(s string) []string {
strs := strings.Split(s, " ")
result := make([]string, 0, len(strs))
for _, s := range strs {
if len(s) > 0 {
result = append(result, s)
}
}
sort.Strings(result)
return result
}

0 comments on commit 96d4b33

Please sign in to comment.