diff --git a/docs/_index.md b/docs/_index.md index 8d20353..440008d 100644 --- a/docs/_index.md +++ b/docs/_index.md @@ -85,23 +85,39 @@ kci-dev --settings /path/to/.kci-dev.toml #### results -Pull results from the Dashboard. See detailed [documentation](results). +Pull results from the Web Dashboard. See detailed [documentation](results). ### Maestro Commands -#### checkout +#### config + +Setup the base config for talking to the maestro API. See Configuration section above. - [config](config) #### checkout +Trigger ad-hoc test of specific tree/branch/commit. + - [checkout](checkout) #### testretry +Trigger a test retry for a given Maestro node id. + - [testretry](testretry) + +#### watch + +Watch for the results of a given node id. + +- [watch](watch) + + #### maestro-results +Pull Maestro results in the json format. + - [maestro-results](maestro-results) diff --git a/docs/checkout.md b/docs/checkout.md index 0f9356f..ec974df 100644 --- a/docs/checkout.md +++ b/docs/checkout.md @@ -111,7 +111,7 @@ Additionally, you can use --watch option to watch the progress of the test. After executing the command, you will see the output similar to the following: ```sh -./kci-dev.py checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --watch +kci-dev checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --watch api connect: https://staging.kernelci.org:9100/ Retrieving latest commit on tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git branch: master Commit to checkout: d3d1556696c1a993eec54ac585fe5bf677e07474 @@ -171,7 +171,7 @@ Together with --watch option, you can use --test option to wait for particular t For example: ```sh -kci-dev.py checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --platform-filter sc7180-trogdor-kingoftown --watch --test crit +kci-dev checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --platform-filter sc7180-trogdor-kingoftown --watch --test crit ``` This command will wait for the test results of the test with the name `crit`. diff --git a/docs/watch.md b/docs/watch.md new file mode 100644 index 0000000..11f4855 --- /dev/null +++ b/docs/watch.md @@ -0,0 +1,42 @@ ++++ +title = 'watch' +date = 2025-01-30T07:07:07+01:00 +description = 'Watch for the results of given node' ++++ + +This command waits for the results of particular node id. + +Example: +```sh +kci-dev watch --nodeid 679a91b565fae3351e2fac77 --job-filter "kbuild-gcc-12-x86-chromeos-amd" +``` + +`--job-filter` and `--test` work in the same manner as in the [checkout](../checkout.md) command. + +## --node-id + +The Maestro node id to watch for. + +## --job-filter + +Pass one or more job filters: + +```sh +kci-dev watch --nodeid 679a91b565fae3351e2fac77 --job-filter "kbuild-gcc-12-x86-chromeos-amd" --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm +``` + +### --test + +Return code of kci-dev will depend on the test result for the supplied test name: + +- `pass` - return code 0 (test passed) +- `fail` - return code 1 (test failed) +- `error` - return code 2 (prior steps failed, such as compilation, test setup, etc, or infrastructure error) +- `critical error` - return code 64 (kci-dev failed to execute command, crashed, etc) + +For example: +```sh +kci-dev watch --nodeid 679a91b565fae3351e2fac77 --job-filter baseline-nfs-arm64-qualcomm --test crit +``` + +This command can be used for regression bisection, where you can test if the test `crit` pass or fail on the specific commit. diff --git a/kcidev/libs/maestro_common.py b/kcidev/libs/maestro_common.py index fdbd977..b890f6e 100644 --- a/kcidev/libs/maestro_common.py +++ b/kcidev/libs/maestro_common.py @@ -2,8 +2,10 @@ # -*- coding: utf-8 -*- import json +import time import click +import requests from kcidev.libs.common import * @@ -23,3 +25,189 @@ def maestro_api_error(response): except Exception as e: kci_err(f"API response error: {e}: {response.text}") return + + +def maestro_print_nodes(nodes, field): + res = [] + if not isinstance(nodes, list): + nodes = [nodes] + for node in nodes: + if field: + data = {} + for f in field: + data[f] = node.get(f) + res.append(data) + else: + res.append(node) + kci_msg(json.dumps(res, sort_keys=True, indent=4)) + + +def maestro_get_node(url, nodeid): + headers = { + "Content-Type": "application/json; charset=utf-8", + } + url = url + "latest/node/" + nodeid + maestro_print_api_call(url) + response = requests.get(url, headers=headers) + try: + response.raise_for_status() + except requests.exceptions.HTTPError as ex: + kci_err(ex.response.json().get("detail")) + click.Abort() + except Exception as ex: + kci_err(ex) + click.Abort() + + return response.json() + + +def maestro_get_nodes(url, limit, offset, filter): + headers = { + "Content-Type": "application/json; charset=utf-8", + } + url = url + "latest/nodes/fast?limit=" + str(limit) + "&offset=" + str(offset) + maestro_print_api_call(url) + if filter: + for f in filter: + # TBD: We need to add translate filter to API + # if we need anything more complex than eq(=) + url = url + "&" + f + + response = requests.get(url, headers=headers) + try: + response.raise_for_status() + except requests.exceptions.HTTPError as ex: + kci_err(ex.response.json().get("detail")) + click.Abort() + except Exception as ex: + kci_err(ex) + click.Abort() + + return response.json() + + +def maestro_check_node(node): + """ + Node can be defined RUNNING/DONE/FAIL based on the state + Simplify, as our current state model suboptimal + """ + name = node["name"] + state = node["state"] + result = node["result"] + if name == "checkout": + if state == "running": + return "RUNNING" + elif state == "available" or state == "closing": + return "DONE" + elif state == "done" and result == "pass": + return "DONE" + else: + return "FAIL" + else: + if state == "running": + return "RUNNING" + elif state == "done" and result == "pass": + return "DONE" + else: + return "FAIL" + + +def maestro_retrieve_treeid_nodes(baseurl, token, treeid): + url = baseurl + "latest/nodes/fast?treeid=" + treeid + headers = { + "Content-Type": "application/json; charset=utf-8", + "Authorization": f"{token}", + } + try: + response = requests.get(url, headers=headers, timeout=30) + except requests.exceptions.RequestException as e: + click.secho(f"API connection error: {e}, retrying...", fg="yellow") + return None + except Exception as e: + click.secho(f"API connection error: {e}, retrying...", fg="yellow") + return None + + if response.status_code >= 400: + maestro_api_error(response) + return None + + return response.json() + + +def maestro_watch_jobs(baseurl, token, treeid, job_filter, test): + # we need to add to job_filter "checkout" node + job_filter = list(job_filter) + job_filter.append("checkout") + previous_nodes = None + while True: + inprogress = 0 + joblist = job_filter.copy() + nodes = maestro_retrieve_treeid_nodes(baseurl, token, treeid) + if not nodes: + click.secho("No nodes found. Retrying...", fg="yellow") + time.sleep(5) + continue + if previous_nodes == nodes: + kci_msg_nonl(".") + time.sleep(30) + continue + + time_local = time.localtime() + click.echo(f"\nCurrent time: {time.strftime('%Y-%m-%d %H:%M:%S', time_local)}") + click.secho( + f"Total tree nodes {len(nodes)} found. job_filter: {job_filter}", fg="green" + ) + + # Tricky part in watch is that we might have one item in job_filter (job, test), + # but it might spawn multiple nodes with same name + test_result = None + jobs_done_ts = None + for node in nodes: + if node["name"] == test: + test_result = node["result"] + if node["name"] in job_filter: + result = maestro_check_node(node) + if result == "DONE": + if isinstance(joblist, list) and node["name"] in joblist: + joblist.remove(node["name"]) + color = "green" + elif result == "RUNNING": + inprogress += 1 + color = "yellow" + else: + if isinstance(joblist, list) and node["name"] in joblist: + joblist.remove(node["name"]) + color = "red" + # if test is same as job, dont indicate infra-failure if test job fail + if test and test != node["name"]: + # if we have a test, and prior job failed, we should indicate that + kci_err(f"Job {node['name']} failed, test can't be executed") + sys.exit(2) + nodeid = node.get("id") + click.secho( + f"Node: {nodeid} job: {node['name']} State: {node['state']} Result: {node['result']}", + fg=color, + ) + if isinstance(joblist, list) and len(joblist) == 0 and inprogress == 0: + click.secho("All jobs completed", fg="green") + if not test: + return + else: + if not jobs_done_ts: + jobs_done_ts = time.time() + # if all jobs done, usually test results must be available + # max within 60s. Safeguard in case of test node is not available + if not test_result and time.time() - jobs_done_ts < 60: + continue + + if test_result and test_result == "pass": + click.secho(f"Test {test} passed", fg="green") + sys.exit(0) + elif test_result: + # ignore null, that means result not ready yet + kci_err(f"Test {test} failed: {test_result}") + sys.exit(1) + + kci_msg_nonl(f"\rRefresh every 30s...") + previous_nodes = nodes + time.sleep(30) diff --git a/kcidev/main.py b/kcidev/main.py index 78868aa..84687ee 100755 --- a/kcidev/main.py +++ b/kcidev/main.py @@ -13,6 +13,7 @@ patch, results, testretry, + watch, ) @@ -56,6 +57,7 @@ def run(): cli.add_command(maestro_results.maestro_results) cli.add_command(testretry.testretry) cli.add_command(results.results) + cli.add_command(watch.watch) cli() diff --git a/kcidev/subcommands/checkout.py b/kcidev/subcommands/checkout.py index 1f87848..5ceb51b 100644 --- a/kcidev/subcommands/checkout.py +++ b/kcidev/subcommands/checkout.py @@ -44,133 +44,6 @@ def send_checkout_full(baseurl, token, **kwargs): return response.json() -def retrieve_treeid_nodes(baseurl, token, treeid): - url = baseurl + "latest/nodes/fast?treeid=" + treeid - headers = { - "Content-Type": "application/json; charset=utf-8", - "Authorization": f"{token}", - } - try: - response = requests.get(url, headers=headers, timeout=30) - except requests.exceptions.RequestException as e: - click.secho(f"API connection error: {e}, retrying...", fg="yellow") - return None - except Exception as e: - click.secho(f"API connection error: {e}, retrying...", fg="yellow") - return None - - if response.status_code >= 400: - maestro_api_error(response) - return None - - return response.json() - - -def check_node(node): - """ - Node can be defined RUNNING/DONE/FAIL based on the state - Simplify, as our current state model suboptimal - """ - name = node["name"] - state = node["state"] - result = node["result"] - if name == "checkout": - if state == "running": - return "RUNNING" - elif state == "available" or state == "closing": - return "DONE" - elif state == "done" and result == "pass": - return "DONE" - else: - return "FAIL" - else: - if state == "running": - return "RUNNING" - elif state == "done" and result == "pass": - return "DONE" - else: - return "FAIL" - - -def watch_jobs(baseurl, token, treeid, job_filter, test): - # we need to add to job_filter "checkout" node - job_filter = list(job_filter) - job_filter.append("checkout") - previous_nodes = None - while True: - inprogress = 0 - joblist = job_filter.copy() - nodes = retrieve_treeid_nodes(baseurl, token, treeid) - if not nodes: - click.secho("No nodes found. Retrying...", fg="yellow") - time.sleep(5) - continue - if previous_nodes == nodes: - kci_msg_nonl(".") - time.sleep(30) - continue - - time_local = time.localtime() - click.echo(f"\nCurrent time: {time.strftime('%Y-%m-%d %H:%M:%S', time_local)}") - click.secho( - f"Total tree nodes {len(nodes)} found. job_filter: {job_filter}", fg="green" - ) - - # Tricky part in watch is that we might have one item in job_filter (job, test), - # but it might spawn multiple nodes with same name - test_result = None - jobs_done_ts = None - for node in nodes: - if node["name"] == test: - test_result = node["result"] - if node["name"] in job_filter: - result = check_node(node) - if result == "DONE": - if isinstance(joblist, list) and node["name"] in joblist: - joblist.remove(node["name"]) - color = "green" - elif result == "RUNNING": - inprogress += 1 - color = "yellow" - else: - if isinstance(joblist, list) and node["name"] in joblist: - joblist.remove(node["name"]) - color = "red" - # if test is same as job, dont indicate infra-failure if test job fail - if test and test != node["name"]: - # if we have a test, and prior job failed, we should indicate that - kci_err(f"Job {node['name']} failed, test can't be executed") - sys.exit(2) - nodeid = node.get("id") - click.secho( - f"Node: {nodeid} job: {node['name']} State: {node['state']} Result: {node['result']}", - fg=color, - ) - if len(joblist) == 0 and inprogress == 0: - click.secho("All jobs completed", fg="green") - if not test: - return - else: - if not jobs_done_ts: - jobs_done_ts = time.time() - # if all jobs done, usually test results must be available - # max within 60s. Safeguard in case of test node is not available - if not test_result and time.time() - jobs_done_ts < 60: - continue - - if test_result and test_result == "pass": - click.secho(f"Test {test} passed", fg="green") - sys.exit(0) - elif test_result: - # ignore null, that means result not ready yet - kci_err(f"Test {test} failed: {test_result}") - sys.exit(1) - - kci_msg_nonl(f"\rRefresh every 30s...") - previous_nodes = nodes - time.sleep(30) - - def retrieve_tot_commit(repourl, branch): """ Retrieve the latest commit on a branch @@ -284,7 +157,7 @@ def checkout( if test: click.secho(f"Watching for test result: {test}", fg="green") # watch for jobs - watch_jobs(apiurl, token, treeid, job_filter, test) + maestro_watch_jobs(apiurl, token, treeid, job_filter, test) if __name__ == "__main__": diff --git a/kcidev/subcommands/maestro_results.py b/kcidev/subcommands/maestro_results.py index e82551d..4e6b5c9 100644 --- a/kcidev/subcommands/maestro_results.py +++ b/kcidev/subcommands/maestro_results.py @@ -1,76 +1,13 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import json -import pprint - import click -import requests from git import Repo from kcidev.libs.common import * from kcidev.libs.maestro_common import * -def print_nodes(nodes, field): - res = [] - if not isinstance(nodes, list): - nodes = [nodes] - for node in nodes: - if field: - data = {} - for f in field: - data[f] = node.get(f) - res.append(data) - else: - res.append(node) - kci_msg(json.dumps(res, sort_keys=True, indent=4)) - - -def get_node(url, nodeid, field): - headers = { - "Content-Type": "application/json; charset=utf-8", - } - url = url + "latest/node/" + nodeid - maestro_print_api_call(url) - response = requests.get(url, headers=headers) - try: - response.raise_for_status() - except requests.exceptions.HTTPError as ex: - kci_err(ex.response.json().get("detail")) - return None - except Exception as ex: - kci_err(ex) - return None - print_nodes(response.json(), field) - - -def get_nodes(url, limit, offset, filter, field): - headers = { - "Content-Type": "application/json; charset=utf-8", - } - url = url + "latest/nodes/fast?limit=" + str(limit) + "&offset=" + str(offset) - maestro_print_api_call(url) - if filter: - for f in filter: - # TBD: We need to add translate filter to API - # if we need anything more complex than eq(=) - url = url + "&" + f - - response = requests.get(url, headers=headers) - try: - response.raise_for_status() - except requests.exceptions.HTTPError as ex: - kci_err(ex.response.json().get("detail")) - return None - except Exception as ex: - kci_err(ex) - return None - - nodes = response.json() - print_nodes(nodes, field) - - @click.command(help="Get results directly from KernelCI's Maestro") @click.option( "--nodeid", @@ -113,9 +50,10 @@ def maestro_results(ctx, nodeid, nodes, limit, offset, filter, field): instance = ctx.obj.get("INSTANCE") url = config[instance]["api"] if nodeid: - get_node(url, nodeid, field) + results = maestro_get_node(url, nodeid) if nodes: - get_nodes(url, limit, offset, filter, field) + results = maestro_get_nodes(url, limit, offset, filter) + maestro_print_nodes(results, field) if __name__ == "__main__": diff --git a/kcidev/subcommands/watch.py b/kcidev/subcommands/watch.py new file mode 100644 index 0000000..c63be97 --- /dev/null +++ b/kcidev/subcommands/watch.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import click + +from kcidev.libs.common import * +from kcidev.libs.maestro_common import * + + +@click.command(help="Watch completion of a test job") +@click.option( + "--nodeid", + help="define the node id of the job to watch for", + required=True, +) +@click.option( + "--job-filter", + help="Job filter to trigger", + multiple=True, +) +@click.option( + "--test", + help="Return code based on the test result", +) +@click.pass_context +def watch(ctx, nodeid, job_filter, test): + cfg = ctx.obj.get("CFG") + instance = ctx.obj.get("INSTANCE") + url = cfg[instance]["pipeline"] + apiurl = cfg[instance]["api"] + token = cfg[instance]["token"] + + node = maestro_get_node(apiurl, nodeid) + maestro_watch_jobs(apiurl, token, node["treeid"], job_filter, test) + + +if __name__ == "__main__": + main_kcidev()