From 376180540285d51587ce9a9b45685e0046bd2d37 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Thu, 29 Sep 2022 01:39:42 -0400 Subject: [PATCH 01/15] prototype --- .../config/ray-cluster.mini.yaml.template | 118 +++++++++++++ tests/framework/prototype.py | 162 ++++++++++++++++++ 2 files changed, 280 insertions(+) create mode 100644 tests/framework/config/ray-cluster.mini.yaml.template create mode 100644 tests/framework/prototype.py diff --git a/tests/framework/config/ray-cluster.mini.yaml.template b/tests/framework/config/ray-cluster.mini.yaml.template new file mode 100644 index 00000000000..188ac8f0690 --- /dev/null +++ b/tests/framework/config/ray-cluster.mini.yaml.template @@ -0,0 +1,118 @@ +apiVersion: ray.io/v1alpha1 +kind: RayCluster +metadata: + labels: + controller-tools.k8s.io: "1.0" + # An unique identifier for the head node and workers of this cluster. + name: raycluster-mini +spec: + rayVersion: '$ray_version' # should match the Ray version in the image of the containers + ######################headGroupSpecs################################# + # head group template and specs, (perhaps 'group' is not needed in the name) + headGroupSpec: + # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer' + serviceType: ClusterIP + # the pod replicas in this group typed head (assuming there could be more than 1 in the future) + replicas: 1 + # logical group name, for this called head-group, also can be functional + # pod type head or worker + # rayNodeType: head # Not needed since it is under the headgroup + # the following params are used to complete the ray start: ray start --head --block --redis-port=6379 ... + rayStartParams: + port: '6379' # should match headService targetPort + #include_webui: 'true' + redis-password: 'LetMeInRay' # Deprecated since Ray 1.11 due to GCS bootstrapping enabled + # webui_host: "10.1.2.60" + dashboard-host: '0.0.0.0' + num-cpus: '1' # can be auto-completed from the limits + node-ip-address: $$MY_POD_IP # auto-completed as the head pod IP + block: 'true' + #pod template + template: + metadata: + labels: + # custom labels. NOTE: do not define custom labels start with `raycluster.`, they may be used in controller. + # Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + rayCluster: raycluster-compatibility-test + rayNodeType: head # will be injected if missing, must be head or wroker + groupName: headgroup # will be injected if missing + # annotations for pod + annotations: + key: value + spec: + containers: + - name: ray-head + image: rayproject/ray:1.9.0 + env: + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + ports: + - containerPort: 6379 + name: redis + - containerPort: 8265 # Ray dashboard + name: dashboard + - containerPort: 10001 + name: client + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 1 + groupName: small-group + # the following params are used to complete the ray start: ray start --block --node-ip-address= ... + rayStartParams: + redis-password: 'LetMeInRay' # Deprecated since Ray 1.11 due to GCS bootstrapping enabled + node-ip-address: $$MY_POD_IP + block: 'true' + #pod template + template: + metadata: + labels: + rayCluster: raycluster-compatibility-test + rayNodeType: worker # will be injected if missing + groupName: small-group # will be injected if missing + # annotations for pod + annotations: + key: value + spec: + initContainers: # to avoid worker crashing before head service is created + - name: init-myservice + image: busybox:1.28 + # Change the cluster postfix if you don't have a default setting + command: ['sh', '-c', "until nslookup $$RAY_IP.$$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"] + containers: + - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' + image: rayproject/ray:1.9.0 + # environment variables to set in the container.Optional. + # Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/ + env: + - name: TYPE + value: "worker" + - name: RAY_DISABLE_DOCKER_CPU_WARNING + value: "1" + - name: CPU_REQUEST + valueFrom: + resourceFieldRef: + containerName: machine-learning + resource: requests.cpu + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + ports: + - containerPort: 80 + # use volumeMounts.Optional. + # Refer to https://kubernetes.io/docs/concepts/storage/volumes/ + volumeMounts: + - mountPath: /var/log + name: log-volume + # use volumes + # Refer to https://kubernetes.io/docs/concepts/storage/volumes/ + volumes: + - name: log-volume + emptyDir: {} diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py new file mode 100644 index 00000000000..0ee5e3e167b --- /dev/null +++ b/tests/framework/prototype.py @@ -0,0 +1,162 @@ +import yaml +import copy +from typing import Optional +from kubernetes import client, config +import os +import time +import docker + +class DeltaSet: + def __init__(self, path, candidates): + self.path = path + self.candidates = candidates + + def iterate(self): + for candidate in self.candidates: + yield candidate + + # return value: (1) steps (2) None => this path does not exist + def find_path(self, cr) -> Optional[list[str]]: + steps = self.path.split('.') + return steps + +def search_path(cr, steps): + curr = cr + for step in steps: + if step.isnumeric(): + int_step = int(step) + if int_step >= len(curr) or int_step < 0: + return None + curr = curr[int(step)] + elif step in curr: + curr = curr[step] + else: + return None + return curr + +class Mutator: + def __init__(self, baseCR, deltaSets: list[DeltaSet]): + self.baseCR = baseCR + self.deltaSets = deltaSets + def mutate(self): + pass + def apply_delta(self, base, delta, steps): + root = copy.deepcopy(base) + curr = search_path(root, steps[:-1]) + curr[steps[-1]] = delta + return root + +class Rule: + def __init__(self): + pass + def trigger_condition(self, cr) -> bool: + pass + def assertRule(self, cr): + pass + +class RuleSet: + def __init__(self, rules: list[Rule]): + self.rules = rules + def checkRuleSet(self, cr, namespace): + for rule in self.rules: + if rule.trigger_condition(cr): + rule.assertRule(cr, namespace) + +class CREvent: + def __init__(self, cr, cmd, ruleSets: list[RuleSet], timeout, namespace): + self.ruleSets = ruleSets + self.cmd = cmd + self.timeout = timeout + self.cr = cr + self.namespace = namespace + def trigger(self): + self.exec() + self.wait() + self.checkRuleSets() + def exec(self): + os.system(self.cmd) + def wait(self): + time.sleep(self.timeout) + def checkRuleSets(self): + for rs in self.ruleSets: + rs.checkRuleSet(self.cr, self.namespace) + +class SimpleMutator(Mutator): + def mutate(self): + for deltaSet in self.deltaSets: + steps = deltaSet.find_path(self.baseCR) + if steps: + for delta in deltaSet.iterate(): + yield self.apply_delta(self.baseCR, delta, steps) + else: + print("The path does not exist") + +class HeadPodNameRule(Rule): + def trigger_condition(self, cr) -> bool: + steps = "spec.headGroupSpec".split('.') + print("trigger_condition: " + (search_path(cr, steps) != None)) + return (search_path(cr, steps) != None) + + def assertRule(self, cr, namespace): + expected_val = search_path(cr, "spec.headGroupSpec.template.spec.containers.0.name".split('.')) + config.load_kube_config() + v1 = client.CoreV1Api() + headpods = v1.list_namespaced_pod(namespace = namespace, label_selector='rayNodeType={}'.format('head')) + print("HeadPodNameRule: {} {}".format(expected_val, headpods.items[0].spec.containers[0].name)) + assert(headpods.items[0].spec.containers[0].name == expected_val) + +def delete_kind_cluster(): + os.system("kind delete cluster") + +def create_kind_cluster(): + os.system("kind create cluster") + os.system("kubectl wait --for=condition=ready pod -n kube-system --all --timeout=900s") + +def install_crd(): + KUBERAY_VERSION = "v0.3.0" + os.system("kubectl create -k \"github.com/ray-project/kuberay/manifests/cluster-scope-resources?" + + "ref={}&timeout=90s\"".format(KUBERAY_VERSION)) + +def download_images(images): + client = docker.from_env() + for image in images: + client.images.pull(image) + client.close() + +def install_kuberay_operator(): + KUBERAY_VERSION = "v0.3.0" + os.system("kubectl apply -k \"github.com/ray-project/kuberay/manifests/base?" + + "ref={}&timeout=90s\"".format(KUBERAY_VERSION)) + time.sleep(60) + +def kind_load_images(images): + for image in images: + os.system('kind load docker-image {}'.format(image)) + +if __name__ == '__main__': + template_name = 'config/ray-cluster.mini.yaml.template' + namespace = 'default' + with open(template_name) as base_yaml: + baseCR = yaml.load(base_yaml, Loader=yaml.FullLoader) + ds = DeltaSet("spec.headGroupSpec.template.spec.containers.0.name", ['ray-head-1', 'ray-head-2', 'ray-head-3']) + rs = RuleSet([HeadPodNameRule()]) + mut = SimpleMutator(baseCR, [ds]) + images = ['rayproject/ray:1.9.0', 'kuberay/operator:v0.3.0', 'kuberay/apiserver:v0.3.0'] + + for cr in mut.mutate(): + delete_kind_cluster() + create_kind_cluster() + install_crd() + download_images(images) + kind_load_images(images) + install_kuberay_operator() + f = open('tmp.yaml', 'w') + f.write(yaml.dump(cr)) + f.close() + addEvent = CREvent(cr, "kubectl apply -f tmp.yaml", [rs], 90, namespace) + addEvent.trigger() + + + + + From c4c3a237e30a6c57665d7181256077a7d5cec18a Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Thu, 29 Sep 2022 17:35:46 -0400 Subject: [PATCH 02/15] update --- .../config/ray-cluster.mini.yaml.template | 4 +- tests/framework/prototype.py | 158 +++++++++++------- 2 files changed, 104 insertions(+), 58 deletions(-) diff --git a/tests/framework/config/ray-cluster.mini.yaml.template b/tests/framework/config/ray-cluster.mini.yaml.template index 188ac8f0690..b50074810d3 100644 --- a/tests/framework/config/ray-cluster.mini.yaml.template +++ b/tests/framework/config/ray-cluster.mini.yaml.template @@ -42,7 +42,7 @@ spec: spec: containers: - name: ray-head - image: rayproject/ray:1.9.0 + image: rayproject/ray:2.0.0 env: - name: MY_POD_IP valueFrom: @@ -83,7 +83,7 @@ spec: command: ['sh', '-c', "until nslookup $$RAY_IP.$$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"] containers: - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' - image: rayproject/ray:1.9.0 + image: rayproject/ray:2.0.0 # environment variables to set in the container.Optional. # Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/ env: diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 0ee5e3e167b..48c608aa6f8 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -1,25 +1,12 @@ import yaml import copy from typing import Optional -from kubernetes import client, config +from kubernetes import client, config, utils import os import time import docker -class DeltaSet: - def __init__(self, path, candidates): - self.path = path - self.candidates = candidates - - def iterate(self): - for candidate in self.candidates: - yield candidate - - # return value: (1) steps (2) None => this path does not exist - def find_path(self, cr) -> Optional[list[str]]: - steps = self.path.split('.') - return steps - +# Utility functions def search_path(cr, steps): curr = cr for step in steps: @@ -34,26 +21,95 @@ def search_path(cr, steps): return None return curr +def yaml_to_file(cr_yaml, fn): + f = open(fn, 'w') + f.write(yaml.dump(cr_yaml)) + f.close() +''' +Functions for cluster preparation. Typical workflow: + Delete KinD cluster -> Create KinD cluster -> Install CRD -> Download Images (from DockerHub) -> + Load images into KinD cluster -> Install KubeRay operator +''' +def delete_kind_cluster(): + os.system("kind delete cluster") + +def create_kind_cluster(): + os.system("kind create cluster") + os.system("kubectl wait --for=condition=ready pod -n kube-system --all --timeout=900s") + +def install_crd(): + KUBERAY_VERSION = "v0.3.0" + os.system("kubectl create -k \"github.com/ray-project/kuberay/manifests/cluster-scope-resources?" + + "ref={}&timeout=90s\"".format(KUBERAY_VERSION)) + +def download_images(images): + client = docker.from_env() + for image in images: + client.images.pull(image) + client.close() + +def kind_load_images(images): + for image in images: + os.system('kind load docker-image {}'.format(image)) + +def install_kuberay_operator(): + KUBERAY_VERSION = "v0.3.0" + os.system("kubectl apply -k \"github.com/ray-project/kuberay/manifests/base?" + + "ref={}&timeout=90s\"".format(KUBERAY_VERSION)) + time.sleep(60) + +''' +Configuration Test Framework Abstractions: (1) DeltaSet (2) Mutator (3) Rule (4) RuleSet (5) CREvent +''' + +# DeltaSet: Use `path` to specify the field that wants to mutate, and `candidates` is a list of candidate +# value for the field. +# Example: DeltaSet("spec.headGroupSpec.template.spec.containers.0.name", ['ray-head-1', 'ray-head-2', 'ray-head-3']) +class DeltaSet: + def __init__(self, path, candidates): + self.path = path + self.candidates = candidates + + def iterate(self): + for candidate in self.candidates: + yield candidate + + # return value: (1) steps (2) None => this path does not exist + def find_path(self, cr) -> Optional[list[str]]: + steps = self.path.split('.') + return steps + +# Mutator: Mutator will start to mutate from `baseCR`. `deltaSets` is a list of DeltaSets, and each DeltaSet +# specifies a field that wants to mutate with multiple candidate values. +# Example: "SimpleMutator" class Mutator: def __init__(self, baseCR, deltaSets: list[DeltaSet]): self.baseCR = baseCR self.deltaSets = deltaSets + # You need to define your mutate() function by inheriting `Mutator`. It should return a new CR. def mutate(self): pass + # Apply delta to the base (custom resource). def apply_delta(self, base, delta, steps): root = copy.deepcopy(base) curr = search_path(root, steps[:-1]) curr[steps[-1]] = delta return root +# Rule: Rule is used to check whether the actual cluster state is the same as our expectation after a CREvent. +# We can infer the expected state by CR YAML file, and get the actual cluster state by Kubernetes API. +# Example: "HeadPodNameRule" class Rule: def __init__(self): pass - def trigger_condition(self, cr) -> bool: - pass - def assertRule(self, cr): + # The rule will only be checked when `trigger_condition` is true. For example, we will only check + # "HeadPodNameRule" when "spec.headGroupSpec" is defined in CR YAML file. + def trigger_condition(self, cr=None) -> bool: + return True + def assertRule(self, cr=None, namespace='default'): pass +# RuleSet: A set of Rule class RuleSet: def __init__(self, rules: list[Rule]): self.rules = rules @@ -62,6 +118,14 @@ def checkRuleSet(self, cr, namespace): if rule.trigger_condition(cr): rule.assertRule(cr, namespace) +# CREvent: Custom Resource Event can be mainly divided into 3 categories. +# (1) Add (create) CR (2) Update CR (3) Delete CR +# +# The member functions integrate together in `trigger()`. +# [Step1] exec(): Execute a command to trigger the CREvent. For example, create a CR by a +# `kubectl apply` command. +# [Step2] wait(): Wait for the system to converge. +# [Step3] checkRuleSets(): When the system converges, check all registered RuleSets. class CREvent: def __init__(self, cr, cmd, ruleSets: list[RuleSet], timeout, namespace): self.ruleSets = ruleSets @@ -81,6 +145,9 @@ def checkRuleSets(self): for rs in self.ruleSets: rs.checkRuleSet(self.cr, self.namespace) +''' +My implementations +''' class SimpleMutator(Mutator): def mutate(self): for deltaSet in self.deltaSets: @@ -94,44 +161,19 @@ def mutate(self): class HeadPodNameRule(Rule): def trigger_condition(self, cr) -> bool: steps = "spec.headGroupSpec".split('.') - print("trigger_condition: " + (search_path(cr, steps) != None)) return (search_path(cr, steps) != None) def assertRule(self, cr, namespace): expected_val = search_path(cr, "spec.headGroupSpec.template.spec.containers.0.name".split('.')) - config.load_kube_config() - v1 = client.CoreV1Api() - headpods = v1.list_namespaced_pod(namespace = namespace, label_selector='rayNodeType={}'.format('head')) - print("HeadPodNameRule: {} {}".format(expected_val, headpods.items[0].spec.containers[0].name)) + headpods = client.CoreV1Api().list_namespaced_pod(namespace = namespace, label_selector='rayNodeType={}'.format('head')) assert(headpods.items[0].spec.containers[0].name == expected_val) -def delete_kind_cluster(): - os.system("kind delete cluster") - -def create_kind_cluster(): - os.system("kind create cluster") - os.system("kubectl wait --for=condition=ready pod -n kube-system --all --timeout=900s") - -def install_crd(): - KUBERAY_VERSION = "v0.3.0" - os.system("kubectl create -k \"github.com/ray-project/kuberay/manifests/cluster-scope-resources?" + - "ref={}&timeout=90s\"".format(KUBERAY_VERSION)) - -def download_images(images): - client = docker.from_env() - for image in images: - client.images.pull(image) - client.close() - -def install_kuberay_operator(): - KUBERAY_VERSION = "v0.3.0" - os.system("kubectl apply -k \"github.com/ray-project/kuberay/manifests/base?" + - "ref={}&timeout=90s\"".format(KUBERAY_VERSION)) - time.sleep(60) - -def kind_load_images(images): - for image in images: - os.system('kind load docker-image {}'.format(image)) +class EasyJobRule(Rule): + def assertRule(self, cr=None, namespace='default'): + headpods = client.CoreV1Api().list_namespaced_pod(namespace = namespace, label_selector='rayNodeType={}'.format('head')) + headpodName = headpods.items[0].metadata.name + rtn = os.system("kubectl exec {} -- python -c \"import ray; ray.init(); print(ray.cluster_resources())\"".format(headpodName)) + assert(rtn == 0) if __name__ == '__main__': template_name = 'config/ray-cluster.mini.yaml.template' @@ -139,21 +181,25 @@ def kind_load_images(images): with open(template_name) as base_yaml: baseCR = yaml.load(base_yaml, Loader=yaml.FullLoader) ds = DeltaSet("spec.headGroupSpec.template.spec.containers.0.name", ['ray-head-1', 'ray-head-2', 'ray-head-3']) - rs = RuleSet([HeadPodNameRule()]) + rs = RuleSet([HeadPodNameRule(), EasyJobRule()]) mut = SimpleMutator(baseCR, [ds]) - images = ['rayproject/ray:1.9.0', 'kuberay/operator:v0.3.0', 'kuberay/apiserver:v0.3.0'] + images = ['rayproject/ray:2.0.0', 'kuberay/operator:v0.3.0', 'kuberay/apiserver:v0.3.0'] for cr in mut.mutate(): + # Convert CR object into YAML file + filename = "tmp.yaml" + yaml_to_file(cr, filename) + # Prepare for KinD cluster, CRD, images, and Kuberay operator. delete_kind_cluster() create_kind_cluster() install_crd() download_images(images) kind_load_images(images) install_kuberay_operator() - f = open('tmp.yaml', 'w') - f.write(yaml.dump(cr)) - f.close() - addEvent = CREvent(cr, "kubectl apply -f tmp.yaml", [rs], 90, namespace) + # Prepare for Python k8s client + config.load_kube_config() + # Trigger CREvent + addEvent = CREvent(cr, "kubectl apply -f {}".format(filename), [rs], 90, namespace) addEvent.trigger() From c615c02754535871d2b152b1c6fa5bae6bdff202 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Thu, 29 Sep 2022 20:03:35 -0400 Subject: [PATCH 03/15] unittest & smart wait function --- tests/framework/prototype.py | 71 ++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 48c608aa6f8..776694135ae 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -5,8 +5,13 @@ import os import time import docker +import logging +import unittest # Utility functions +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + def search_path(cr, steps): curr = cr for step in steps: @@ -175,6 +180,50 @@ def assertRule(self, cr=None, namespace='default'): rtn = os.system("kubectl exec {} -- python -c \"import ray; ray.init(); print(ray.cluster_resources())\"".format(headpodName)) assert(rtn == 0) +class AddCREvent(CREvent): + def wait(self): + def check_pod_running(pods) -> bool: + for pod in pods: + if pod.status.phase != 'Running': + return False + return True + start_time = time.time() + expected_head_pods = search_path(self.cr, "spec.headGroupSpec.replicas".split('.')) + expected_worker_pods = search_path(self.cr, "spec.workerGroupSpecs.0.replicas".split('.')) + # Wait until: + # (1) The number of head pods and worker pods are as expected. + # (2) All head pods and worker pods are "Running". + for _ in range(self.timeout): + headpods = client.CoreV1Api().list_namespaced_pod(namespace = self.namespace, label_selector='rayNodeType={}'.format('head')) + workerpods = client.CoreV1Api().list_namespaced_pod(namespace = self.namespace, label_selector='rayNodeType={}'.format('worker')) + if len(headpods.items) == expected_head_pods and len(workerpods.items) == expected_worker_pods and check_pod_running(headpods.items) and check_pod_running(workerpods.items): + logger.info("--- %s seconds ---" % (time.time() - start_time)) + return + time.sleep(1) + raise Exception("wait() timeout") + +# TestSuite +class GeneralTestCase(unittest.TestCase): + def __init__(self, methodName, images, crEvent): + super(GeneralTestCase, self).__init__(methodName) + self.crEvent = crEvent + self.images = images + + def runTest(self): + logging.info("Convert CR object into YAML file") + filename = "tmp.yaml" + yaml_to_file(self.crEvent.cr, filename) + + logging.info("Prepare KinD cluster") + delete_kind_cluster() + create_kind_cluster() + install_crd() + download_images(self.images) + kind_load_images(self.images) + install_kuberay_operator() + config.load_kube_config() + self.crEvent.trigger() + if __name__ == '__main__': template_name = 'config/ray-cluster.mini.yaml.template' namespace = 'default' @@ -184,24 +233,14 @@ def assertRule(self, cr=None, namespace='default'): rs = RuleSet([HeadPodNameRule(), EasyJobRule()]) mut = SimpleMutator(baseCR, [ds]) images = ['rayproject/ray:2.0.0', 'kuberay/operator:v0.3.0', 'kuberay/apiserver:v0.3.0'] + filename = 'tmp.yaml' + test_cases = unittest.TestSuite() for cr in mut.mutate(): - # Convert CR object into YAML file - filename = "tmp.yaml" - yaml_to_file(cr, filename) - # Prepare for KinD cluster, CRD, images, and Kuberay operator. - delete_kind_cluster() - create_kind_cluster() - install_crd() - download_images(images) - kind_load_images(images) - install_kuberay_operator() - # Prepare for Python k8s client - config.load_kube_config() - # Trigger CREvent - addEvent = CREvent(cr, "kubectl apply -f {}".format(filename), [rs], 90, namespace) - addEvent.trigger() - + addEvent = AddCREvent(cr, "kubectl apply -f {}".format(filename), [rs], 90, namespace) + test_cases.addTest(GeneralTestCase('runTest', images, addEvent)) + runner=unittest.TextTestRunner() + runner.run(test_cases) From 397d3064928f6db050bcbb28a09af4c504feb753 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Fri, 30 Sep 2022 20:29:18 -0400 Subject: [PATCH 04/15] raise NotImplementedError --- tests/framework/prototype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 776694135ae..069a2289733 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -112,7 +112,7 @@ def __init__(self): def trigger_condition(self, cr=None) -> bool: return True def assertRule(self, cr=None, namespace='default'): - pass + raise NotImplementedError # RuleSet: A set of Rule class RuleSet: From 1164a24023296814328f9d0e0dd9ecf68ec12381 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Fri, 30 Sep 2022 21:21:52 -0400 Subject: [PATCH 05/15] update --- tests/framework/prototype.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 069a2289733..32ac4d6cf14 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -61,7 +61,6 @@ def install_kuberay_operator(): KUBERAY_VERSION = "v0.3.0" os.system("kubectl apply -k \"github.com/ray-project/kuberay/manifests/base?" + "ref={}&timeout=90s\"".format(KUBERAY_VERSION)) - time.sleep(60) ''' Configuration Test Framework Abstractions: (1) DeltaSet (2) Mutator (3) Rule (4) RuleSet (5) CREvent @@ -132,18 +131,17 @@ def checkRuleSet(self, cr, namespace): # [Step2] wait(): Wait for the system to converge. # [Step3] checkRuleSets(): When the system converges, check all registered RuleSets. class CREvent: - def __init__(self, cr, cmd, ruleSets: list[RuleSet], timeout, namespace): + def __init__(self, cr, ruleSets: list[RuleSet], timeout, namespace): self.ruleSets = ruleSets - self.cmd = cmd self.timeout = timeout - self.cr = cr self.namespace = namespace + self.cr = cr def trigger(self): self.exec() self.wait() self.checkRuleSets() def exec(self): - os.system(self.cmd) + raise NotImplementedError def wait(self): time.sleep(self.timeout) def checkRuleSets(self): @@ -170,17 +168,23 @@ def trigger_condition(self, cr) -> bool: def assertRule(self, cr, namespace): expected_val = search_path(cr, "spec.headGroupSpec.template.spec.containers.0.name".split('.')) - headpods = client.CoreV1Api().list_namespaced_pod(namespace = namespace, label_selector='rayNodeType={}'.format('head')) + headpods = client.CoreV1Api().list_namespaced_pod(namespace = namespace, label_selector='rayNodeType=head') assert(headpods.items[0].spec.containers[0].name == expected_val) class EasyJobRule(Rule): def assertRule(self, cr=None, namespace='default'): - headpods = client.CoreV1Api().list_namespaced_pod(namespace = namespace, label_selector='rayNodeType={}'.format('head')) + headpods = client.CoreV1Api().list_namespaced_pod(namespace = namespace, label_selector='rayNodeType=head') headpodName = headpods.items[0].metadata.name - rtn = os.system("kubectl exec {} -- python -c \"import ray; ray.init(); print(ray.cluster_resources())\"".format(headpodName)) + rtn = os.system("kubectl exec {} -- python -c \"import ray; ray.init(); print(ray.cluster_resources())\"" + .format(headpodName)) assert(rtn == 0) class AddCREvent(CREvent): + def exec(self): + client.CustomObjectsApi().create_namespaced_custom_object( + group = 'ray.io',version = 'v1alpha1', namespace = self.namespace, plural = 'rayclusters', body = self.cr, + pretty = 'true') + def wait(self): def check_pod_running(pods) -> bool: for pod in pods: @@ -194,9 +198,12 @@ def check_pod_running(pods) -> bool: # (1) The number of head pods and worker pods are as expected. # (2) All head pods and worker pods are "Running". for _ in range(self.timeout): - headpods = client.CoreV1Api().list_namespaced_pod(namespace = self.namespace, label_selector='rayNodeType={}'.format('head')) - workerpods = client.CoreV1Api().list_namespaced_pod(namespace = self.namespace, label_selector='rayNodeType={}'.format('worker')) - if len(headpods.items) == expected_head_pods and len(workerpods.items) == expected_worker_pods and check_pod_running(headpods.items) and check_pod_running(workerpods.items): + headpods = client.CoreV1Api().list_namespaced_pod(namespace = self.namespace, + label_selector='rayNodeType={}'.format('head')) + workerpods = client.CoreV1Api().list_namespaced_pod(namespace = self.namespace, + label_selector='rayNodeType={}'.format('worker')) + if (len(headpods.items) == expected_head_pods and len(workerpods.items) == expected_worker_pods + and check_pod_running(headpods.items) and check_pod_running(workerpods.items)): logger.info("--- %s seconds ---" % (time.time() - start_time)) return time.sleep(1) @@ -210,10 +217,6 @@ def __init__(self, methodName, images, crEvent): self.images = images def runTest(self): - logging.info("Convert CR object into YAML file") - filename = "tmp.yaml" - yaml_to_file(self.crEvent.cr, filename) - logging.info("Prepare KinD cluster") delete_kind_cluster() create_kind_cluster() @@ -233,11 +236,10 @@ def runTest(self): rs = RuleSet([HeadPodNameRule(), EasyJobRule()]) mut = SimpleMutator(baseCR, [ds]) images = ['rayproject/ray:2.0.0', 'kuberay/operator:v0.3.0', 'kuberay/apiserver:v0.3.0'] - filename = 'tmp.yaml' test_cases = unittest.TestSuite() for cr in mut.mutate(): - addEvent = AddCREvent(cr, "kubectl apply -f {}".format(filename), [rs], 90, namespace) + addEvent = AddCREvent(cr, [rs], 90, namespace) test_cases.addTest(GeneralTestCase('runTest', images, addEvent)) runner=unittest.TextTestRunner() runner.run(test_cases) From a8b722ccce97933dcea4dc8f0f2cbba5d391aa7b Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Fri, 30 Sep 2022 21:23:30 -0400 Subject: [PATCH 06/15] delete yaml_to_file --- tests/framework/prototype.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 32ac4d6cf14..0a35c631770 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -26,10 +26,6 @@ def search_path(cr, steps): return None return curr -def yaml_to_file(cr_yaml, fn): - f = open(fn, 'w') - f.write(yaml.dump(cr_yaml)) - f.close() ''' Functions for cluster preparation. Typical workflow: Delete KinD cluster -> Create KinD cluster -> Install CRD -> Download Images (from DockerHub) -> From 81bdc8f36e1cf24be3bdf7660830fe7ecf1b1c35 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Sat, 1 Oct 2022 00:36:46 -0400 Subject: [PATCH 07/15] update --- tests/framework/prototype.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 0a35c631770..2a8a022749f 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -175,7 +175,7 @@ def assertRule(self, cr=None, namespace='default'): .format(headpodName)) assert(rtn == 0) -class AddCREvent(CREvent): +class RayClusterAddCREvent(CREvent): def exec(self): client.CustomObjectsApi().create_namespaced_custom_object( group = 'ray.io',version = 'v1alpha1', namespace = self.namespace, plural = 'rayclusters', body = self.cr, @@ -235,7 +235,7 @@ def runTest(self): test_cases = unittest.TestSuite() for cr in mut.mutate(): - addEvent = AddCREvent(cr, [rs], 90, namespace) + addEvent = RayClusterAddCREvent(cr, [rs], 90, namespace) test_cases.addTest(GeneralTestCase('runTest', images, addEvent)) runner=unittest.TextTestRunner() runner.run(test_cases) From b2423f9707db0b0203e881e85804e2f76613955f Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Mon, 3 Oct 2022 13:18:35 -0400 Subject: [PATCH 08/15] jsonpatch support --- tests/framework/prototype.py | 76 ++++++++++++------------------------ 1 file changed, 25 insertions(+), 51 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 2a8a022749f..ce284e657bc 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -1,5 +1,4 @@ import yaml -import copy from typing import Optional from kubernetes import client, config, utils import os @@ -7,6 +6,7 @@ import docker import logging import unittest +import jsonpatch # Utility functions logger = logging.getLogger(__name__) @@ -31,27 +31,32 @@ def search_path(cr, steps): Delete KinD cluster -> Create KinD cluster -> Install CRD -> Download Images (from DockerHub) -> Load images into KinD cluster -> Install KubeRay operator ''' -def delete_kind_cluster(): +def delete_kind_cluster() -> None: + """Delete a KinD cluster""" os.system("kind delete cluster") def create_kind_cluster(): + """Create a KinD cluster""" os.system("kind create cluster") os.system("kubectl wait --for=condition=ready pod -n kube-system --all --timeout=900s") def install_crd(): + """Install Custom Resource Definition (CRD)""" KUBERAY_VERSION = "v0.3.0" os.system("kubectl create -k \"github.com/ray-project/kuberay/manifests/cluster-scope-resources?" + "ref={}&timeout=90s\"".format(KUBERAY_VERSION)) def download_images(images): - client = docker.from_env() + """Download Docker images from DockerHub""" + docker_client = docker.from_env() for image in images: - client.images.pull(image) - client.close() + docker_client.images.pull(image) + docker_client.close() def kind_load_images(images): + """Load downloaded images into KinD cluster""" for image in images: - os.system('kind load docker-image {}'.format(image)) + os.system(f'kind load docker-image {image}') def install_kuberay_operator(): KUBERAY_VERSION = "v0.3.0" @@ -62,39 +67,16 @@ def install_kuberay_operator(): Configuration Test Framework Abstractions: (1) DeltaSet (2) Mutator (3) Rule (4) RuleSet (5) CREvent ''' -# DeltaSet: Use `path` to specify the field that wants to mutate, and `candidates` is a list of candidate -# value for the field. -# Example: DeltaSet("spec.headGroupSpec.template.spec.containers.0.name", ['ray-head-1', 'ray-head-2', 'ray-head-3']) -class DeltaSet: - def __init__(self, path, candidates): - self.path = path - self.candidates = candidates - - def iterate(self): - for candidate in self.candidates: - yield candidate - - # return value: (1) steps (2) None => this path does not exist - def find_path(self, cr) -> Optional[list[str]]: - steps = self.path.split('.') - return steps - # Mutator: Mutator will start to mutate from `baseCR`. `deltaSets` is a list of DeltaSets, and each DeltaSet # specifies a field that wants to mutate with multiple candidate values. -# Example: "SimpleMutator" class Mutator: - def __init__(self, baseCR, deltaSets: list[DeltaSet]): + def __init__(self, baseCR, patch_list: list[jsonpatch.JsonPatch]): self.baseCR = baseCR - self.deltaSets = deltaSets - # You need to define your mutate() function by inheriting `Mutator`. It should return a new CR. + self.patch_list = patch_list + # Generate a new cr by applying the json patch to `cr`. def mutate(self): - pass - # Apply delta to the base (custom resource). - def apply_delta(self, base, delta, steps): - root = copy.deepcopy(base) - curr = search_path(root, steps[:-1]) - curr[steps[-1]] = delta - return root + for patch in self.patch_list: + yield patch.apply(self.baseCR) # Rule: Rule is used to check whether the actual cluster state is the same as our expectation after a CREvent. # We can infer the expected state by CR YAML file, and get the actual cluster state by Kubernetes API. @@ -147,16 +129,6 @@ def checkRuleSets(self): ''' My implementations ''' -class SimpleMutator(Mutator): - def mutate(self): - for deltaSet in self.deltaSets: - steps = deltaSet.find_path(self.baseCR) - if steps: - for delta in deltaSet.iterate(): - yield self.apply_delta(self.baseCR, delta, steps) - else: - print("The path does not exist") - class HeadPodNameRule(Rule): def trigger_condition(self, cr) -> bool: steps = "spec.headGroupSpec".split('.') @@ -228,9 +200,15 @@ def runTest(self): namespace = 'default' with open(template_name) as base_yaml: baseCR = yaml.load(base_yaml, Loader=yaml.FullLoader) - ds = DeltaSet("spec.headGroupSpec.template.spec.containers.0.name", ['ray-head-1', 'ray-head-2', 'ray-head-3']) + + patch_list = [ + jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-1'}]), + jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-2'}]), + jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-3'}]) + ] + rs = RuleSet([HeadPodNameRule(), EasyJobRule()]) - mut = SimpleMutator(baseCR, [ds]) + mut = Mutator(baseCR, patch_list) images = ['rayproject/ray:2.0.0', 'kuberay/operator:v0.3.0', 'kuberay/apiserver:v0.3.0'] test_cases = unittest.TestSuite() @@ -238,8 +216,4 @@ def runTest(self): addEvent = RayClusterAddCREvent(cr, [rs], 90, namespace) test_cases.addTest(GeneralTestCase('runTest', images, addEvent)) runner=unittest.TextTestRunner() - runner.run(test_cases) - - - - + runner.run(test_cases) \ No newline at end of file From 2b9919e96d0d738ef6fb1cd66dc8ce8530a36a3b Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Mon, 3 Oct 2022 18:04:42 -0400 Subject: [PATCH 09/15] reproduce some known config bugs --- .../config/ray-cluster.mini.yaml.template | 7 ++++ tests/framework/prototype.py | 38 +++++++++++++++---- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/tests/framework/config/ray-cluster.mini.yaml.template b/tests/framework/config/ray-cluster.mini.yaml.template index b50074810d3..ff4075b50e2 100644 --- a/tests/framework/config/ray-cluster.mini.yaml.template +++ b/tests/framework/config/ray-cluster.mini.yaml.template @@ -55,6 +55,13 @@ spec: name: dashboard - containerPort: 10001 name: client + resources: + limits: + cpu: 1 + memory: "1G" + requests: + cpu: 1 + memory: "512Mi" workerGroupSpecs: - replicas: 1 minReplicas: 1 diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index ce284e657bc..ebe237c423c 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -88,7 +88,7 @@ def __init__(self): # "HeadPodNameRule" when "spec.headGroupSpec" is defined in CR YAML file. def trigger_condition(self, cr=None) -> bool: return True - def assertRule(self, cr=None, namespace='default'): + def assertRule(self, cr=None, cr_namespace='default'): raise NotImplementedError # RuleSet: A set of Rule @@ -134,11 +134,21 @@ def trigger_condition(self, cr) -> bool: steps = "spec.headGroupSpec".split('.') return (search_path(cr, steps) != None) - def assertRule(self, cr, namespace): + def assertRule(self, cr=None, cr_namespace='default'): expected_val = search_path(cr, "spec.headGroupSpec.template.spec.containers.0.name".split('.')) - headpods = client.CoreV1Api().list_namespaced_pod(namespace = namespace, label_selector='rayNodeType=head') + headpods = client.CoreV1Api().list_namespaced_pod(namespace = cr_namespace, label_selector='rayNodeType=head') assert(headpods.items[0].spec.containers[0].name == expected_val) +class HeadSvcRule(Rule): + """The labels of the head pod and the selectors of the head service must match.""" + def assertRule(self, cr=None, cr_namespace='default'): + head_services = client.CoreV1Api().list_namespaced_service(namespace= cr_namespace, label_selector="ray.io/node-type=head") + assert(len(head_services.items) == 1) + selector_dict = head_services.items[0].spec.selector + selector = ','.join(map(lambda key: f"{key}={selector_dict[key]}", selector_dict)) + headpods = client.CoreV1Api().list_namespaced_pod(namespace =cr_namespace, label_selector=selector) + assert(len(headpods.items) == 1) + class EasyJobRule(Rule): def assertRule(self, cr=None, namespace='default'): headpods = client.CoreV1Api().list_namespaced_pod(namespace = namespace, label_selector='rayNodeType=head') @@ -203,17 +213,29 @@ def runTest(self): patch_list = [ jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-1'}]), - jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-2'}]), - jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-3'}]) + # Reproduce #587 + jsonpatch.JsonPatch([ + {'op': 'replace', 'path': '/spec/workerGroupSpecs/0/replicas', 'value': 2}, + {'op': 'add', 'path': '/spec/workerGroupSpecs/0/template/metadata/name', 'value': 'haha'} + ]), + # Reproduce #585 + jsonpatch.JsonPatch([{'op': 'add', 'path': '/spec/headGroupSpec/rayStartParams/object-manager-port', 'value': '12345'}]), + # Reproduce #572 #530 + jsonpatch.JsonPatch([{'op': 'add', 'path': '/spec/headGroupSpec/template/metadata/labels/app.kubernetes.io~1name', 'value': 'ray'}]), + # Reproduce #529 + jsonpatch.JsonPatch([ + {'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/resources/requests/memory', 'value': '256Mi'}, + {'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/resources/limits/memory', 'value': '512Mi'} + ]) ] - rs = RuleSet([HeadPodNameRule(), EasyJobRule()]) + rs = RuleSet([HeadPodNameRule(), EasyJobRule(), HeadSvcRule()]) mut = Mutator(baseCR, patch_list) images = ['rayproject/ray:2.0.0', 'kuberay/operator:v0.3.0', 'kuberay/apiserver:v0.3.0'] test_cases = unittest.TestSuite() - for cr in mut.mutate(): - addEvent = RayClusterAddCREvent(cr, [rs], 90, namespace) + for new_cr in mut.mutate(): + addEvent = RayClusterAddCREvent(new_cr, [rs], 90, namespace) test_cases.addTest(GeneralTestCase('runTest', images, addEvent)) runner=unittest.TextTestRunner() runner.run(test_cases) \ No newline at end of file From 6e96af84a8e354110fe34c19d2d554f37a1d10a5 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Mon, 3 Oct 2022 19:26:52 -0400 Subject: [PATCH 10/15] avoid KinD cluster recreation --- tests/framework/prototype.py | 58 +++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index ebe237c423c..7a200f2806e 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -63,6 +63,9 @@ def install_kuberay_operator(): os.system("kubectl apply -k \"github.com/ray-project/kuberay/manifests/base?" + "ref={}&timeout=90s\"".format(KUBERAY_VERSION)) +def check_cluster_exist(): + return os.system("kubectl cluster-info --context kind-kind") == 0 + ''' Configuration Test Framework Abstractions: (1) DeltaSet (2) Mutator (3) Rule (4) RuleSet (5) CREvent ''' @@ -150,6 +153,7 @@ def assertRule(self, cr=None, cr_namespace='default'): assert(len(headpods.items) == 1) class EasyJobRule(Rule): + """Submit a very simple Ray job to test the basic functionality of the Ray cluster.""" def assertRule(self, cr=None, namespace='default'): headpods = client.CoreV1Api().list_namespaced_pod(namespace = namespace, label_selector='rayNodeType=head') headpodName = headpods.items[0].metadata.name @@ -182,10 +186,28 @@ def check_pod_running(pods) -> bool: label_selector='rayNodeType={}'.format('worker')) if (len(headpods.items) == expected_head_pods and len(workerpods.items) == expected_worker_pods and check_pod_running(headpods.items) and check_pod_running(workerpods.items)): - logger.info("--- %s seconds ---" % (time.time() - start_time)) + logger.info("--- RayClusterAddCREvent %s seconds ---" % (time.time() - start_time)) + return + time.sleep(1) + raise Exception("RayClusterAddCREvent wait() timeout") + +class RayClusterDeleteCREvent(CREvent): + def exec(self): + client.CustomObjectsApi().delete_namespaced_custom_object( + group = 'ray.io', version = 'v1alpha1', namespace = self.namespace, plural = 'rayclusters', name = self.cr['metadata']['name']) + + def wait(self): + start_time = time.time() + for _ in range(self.timeout): + headpods = client.CoreV1Api().list_namespaced_pod(namespace = self.namespace, + label_selector='rayNodeType={}'.format('head')) + workerpods = client.CoreV1Api().list_namespaced_pod(namespace = self.namespace, + label_selector='rayNodeType={}'.format('worker')) + if (len(headpods.items) == 0 and len(workerpods.items) == 0): + logger.info("--- RayClusterDeleteCREvent %s seconds ---" % (time.time() - start_time)) return time.sleep(1) - raise Exception("wait() timeout") + raise Exception("RayClusterDeleteCREvent wait() timeout") # TestSuite class GeneralTestCase(unittest.TestCase): @@ -194,17 +216,30 @@ def __init__(self, methodName, images, crEvent): self.crEvent = crEvent self.images = images - def runTest(self): - logging.info("Prepare KinD cluster") + @classmethod + def setUpClass(cls): delete_kind_cluster() - create_kind_cluster() - install_crd() - download_images(self.images) - kind_load_images(self.images) - install_kuberay_operator() - config.load_kube_config() + + def setUp(self): + if not check_cluster_exist(): + create_kind_cluster() + install_crd() + download_images(self.images) + kind_load_images(self.images) + install_kuberay_operator() + config.load_kube_config() + + def runTest(self): self.crEvent.trigger() + def tearDown(self) -> None: + try: + delete_event = RayClusterDeleteCREvent(self.crEvent.cr, [], self.crEvent.timeout, self.crEvent.namespace) + delete_event.trigger() + except Exception as e: + logger.error(str(e)) + delete_kind_cluster() + if __name__ == '__main__': template_name = 'config/ray-cluster.mini.yaml.template' namespace = 'default' @@ -213,6 +248,7 @@ def runTest(self): patch_list = [ jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-1'}]), + jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-2'}]), # Reproduce #587 jsonpatch.JsonPatch([ {'op': 'replace', 'path': '/spec/workerGroupSpecs/0/replicas', 'value': 2}, @@ -238,4 +274,4 @@ def runTest(self): addEvent = RayClusterAddCREvent(new_cr, [rs], 90, namespace) test_cases.addTest(GeneralTestCase('runTest', images, addEvent)) runner=unittest.TextTestRunner() - runner.run(test_cases) \ No newline at end of file + runner.run(test_cases) From 6dc8642eb70054e3a77e834942da4447680b7f42 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Mon, 3 Oct 2022 20:42:35 -0400 Subject: [PATCH 11/15] reproduce #612 --- tests/framework/prototype.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 7a200f2806e..2452456fe6f 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -249,6 +249,8 @@ def tearDown(self) -> None: patch_list = [ jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-1'}]), jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-2'}]), + # Reproduce #612 + jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/replicas', 'value': 2}]), # Reproduce #587 jsonpatch.JsonPatch([ {'op': 'replace', 'path': '/spec/workerGroupSpecs/0/replicas', 'value': 2}, From 550db373fd8a58ddf99c4b3f7851ea95e0197e7b Mon Sep 17 00:00:00 2001 From: kaihsun Date: Tue, 11 Oct 2022 22:15:03 +0000 Subject: [PATCH 12/15] update comment --- tests/framework/prototype.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 2452456fe6f..73c93d5c279 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -67,11 +67,11 @@ def check_cluster_exist(): return os.system("kubectl cluster-info --context kind-kind") == 0 ''' -Configuration Test Framework Abstractions: (1) DeltaSet (2) Mutator (3) Rule (4) RuleSet (5) CREvent +Configuration Test Framework Abstractions: (1) Mutator (2) Rule (3) RuleSet (4) CREvent ''' -# Mutator: Mutator will start to mutate from `baseCR`. `deltaSets` is a list of DeltaSets, and each DeltaSet -# specifies a field that wants to mutate with multiple candidate values. +# Mutator: Mutator will start to mutate from `baseCR`. `patch_list` is a list of JsonPatch, and you can +# specify multiple fields that want to mutate in a single JsonPatch. class Mutator: def __init__(self, baseCR, patch_list: list[jsonpatch.JsonPatch]): self.baseCR = baseCR From 4406a4dd80c38aaf86c52a4887bf55f67ab53fd6 Mon Sep 17 00:00:00 2001 From: kaihsun Date: Wed, 12 Oct 2022 19:05:50 +0000 Subject: [PATCH 13/15] add urls --- tests/framework/prototype.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 73c93d5c279..029387d0942 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -249,18 +249,20 @@ def tearDown(self) -> None: patch_list = [ jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-1'}]), jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-2'}]), - # Reproduce #612 + # Reproduce #612: https://github.com/ray-project/kuberay/issues/612 jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/replicas', 'value': 2}]), - # Reproduce #587 + # Reproduce #587: https://github.com/ray-project/kuberay/pull/587 jsonpatch.JsonPatch([ {'op': 'replace', 'path': '/spec/workerGroupSpecs/0/replicas', 'value': 2}, {'op': 'add', 'path': '/spec/workerGroupSpecs/0/template/metadata/name', 'value': 'haha'} ]), - # Reproduce #585 + # Reproduce #585: https://github.com/ray-project/kuberay/pull/585 jsonpatch.JsonPatch([{'op': 'add', 'path': '/spec/headGroupSpec/rayStartParams/object-manager-port', 'value': '12345'}]), - # Reproduce #572 #530 + # Reproduce + # #572: https://github.com/ray-project/kuberay/pull/572 + # #530: https://github.com/ray-project/kuberay/pull/530 jsonpatch.JsonPatch([{'op': 'add', 'path': '/spec/headGroupSpec/template/metadata/labels/app.kubernetes.io~1name', 'value': 'ray'}]), - # Reproduce #529 + # Reproduce #529: https://github.com/ray-project/kuberay/pull/529 jsonpatch.JsonPatch([ {'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/resources/requests/memory', 'value': '256Mi'}, {'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/resources/limits/memory', 'value': '512Mi'} From c8fb915b4116ca56dd06fdb0de42ab6a6be15b91 Mon Sep 17 00:00:00 2001 From: kaihsun Date: Fri, 21 Oct 2022 21:37:46 +0000 Subject: [PATCH 14/15] remove #612 --- tests/framework/prototype.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 029387d0942..e853b68e05e 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -249,8 +249,6 @@ def tearDown(self) -> None: patch_list = [ jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-1'}]), jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/template/spec/containers/0/name', 'value': 'ray-head-2'}]), - # Reproduce #612: https://github.com/ray-project/kuberay/issues/612 - jsonpatch.JsonPatch([{'op': 'replace', 'path': '/spec/headGroupSpec/replicas', 'value': 2}]), # Reproduce #587: https://github.com/ray-project/kuberay/pull/587 jsonpatch.JsonPatch([ {'op': 'replace', 'path': '/spec/workerGroupSpecs/0/replicas', 'value': 2}, From 366ecd66af9bfb9eb9fcaffb57bd40d2c2bdff1b Mon Sep 17 00:00:00 2001 From: kaihsun Date: Thu, 27 Oct 2022 01:26:51 +0000 Subject: [PATCH 15/15] add docstring --- tests/framework/prototype.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index e853b68e05e..c88f9c99213 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -1,5 +1,5 @@ import yaml -from typing import Optional +from typing import List from kubernetes import client, config, utils import os import time @@ -13,6 +13,12 @@ logging.basicConfig(level=logging.INFO) def search_path(cr, steps): + """Search the position in CR based on steps. The following example uses search_path + to get the name of the first container in the head pod. + + [Example] + name = search_path(cr, "spec.headGroupSpec.template.spec.containers.0.name".split('.')) + """ curr = cr for step in steps: if step.isnumeric(): @@ -73,7 +79,7 @@ def check_cluster_exist(): # Mutator: Mutator will start to mutate from `baseCR`. `patch_list` is a list of JsonPatch, and you can # specify multiple fields that want to mutate in a single JsonPatch. class Mutator: - def __init__(self, baseCR, patch_list: list[jsonpatch.JsonPatch]): + def __init__(self, baseCR, patch_list: List[jsonpatch.JsonPatch]): self.baseCR = baseCR self.patch_list = patch_list # Generate a new cr by applying the json patch to `cr`. @@ -96,7 +102,7 @@ def assertRule(self, cr=None, cr_namespace='default'): # RuleSet: A set of Rule class RuleSet: - def __init__(self, rules: list[Rule]): + def __init__(self, rules: List[Rule]): self.rules = rules def checkRuleSet(self, cr, namespace): for rule in self.rules: @@ -112,7 +118,7 @@ def checkRuleSet(self, cr, namespace): # [Step2] wait(): Wait for the system to converge. # [Step3] checkRuleSets(): When the system converges, check all registered RuleSets. class CREvent: - def __init__(self, cr, ruleSets: list[RuleSet], timeout, namespace): + def __init__(self, cr, ruleSets: List[RuleSet], timeout, namespace): self.ruleSets = ruleSets self.timeout = timeout self.namespace = namespace @@ -164,8 +170,7 @@ def assertRule(self, cr=None, namespace='default'): class RayClusterAddCREvent(CREvent): def exec(self): client.CustomObjectsApi().create_namespaced_custom_object( - group = 'ray.io',version = 'v1alpha1', namespace = self.namespace, plural = 'rayclusters', body = self.cr, - pretty = 'true') + group = 'ray.io',version = 'v1alpha1', namespace = self.namespace, plural = 'rayclusters', body = self.cr) def wait(self): def check_pod_running(pods) -> bool: