kernelci · aliceinwire · Jan 31, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/docs/_index.md b/docs/_index.md
@@ -85,23 +85,39 @@ kci-dev --settings /path/to/.kci-dev.toml
 
 #### results
 
-Pull results from the Dashboard. See detailed [documentation](results).
+Pull results from the Web Dashboard. See detailed [documentation](results).
 
 ### Maestro Commands
 
-#### checkout
+#### config
+
+Setup the base config for talking to the maestro API. See Configuration section above.
 
 - [config](config)
 
 #### checkout
 
+Trigger ad-hoc test of specific tree/branch/commit.
+
 - [checkout](checkout)
 
 #### testretry
 
+Trigger a test retry for a given Maestro node id.
+
 - [testretry](testretry)
 
+
+#### watch
+
+Watch for the results of a given node id.
+
+- [watch](watch)
+
+
 #### maestro-results
 
+Pull Maestro results in the json format.
+
 - [maestro-results](maestro-results)
 
diff --git a/docs/checkout.md b/docs/checkout.md
@@ -111,7 +111,7 @@ Additionally, you can use --watch option to watch the progress of the test.
 
 After executing the command, you will see the output similar to the following:
 ```sh
-./kci-dev.py checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --watch
+kci-dev checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --watch
 api connect: https://staging.kernelci.org:9100/
 Retrieving latest commit on tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git branch: master
 Commit to checkout: d3d1556696c1a993eec54ac585fe5bf677e07474
@@ -171,7 +171,7 @@ Together with --watch option, you can use --test option to wait for particular t
 
 For example:
 ```sh
-kci-dev.py checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --platform-filter sc7180-trogdor-kingoftown --watch --test crit
+kci-dev checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --platform-filter sc7180-trogdor-kingoftown --watch --test crit
 ```
 
 This command will wait for the test results of the test with the name `crit`.  

diff --git a/docs/watch.md b/docs/watch.md
@@ -0,0 +1,42 @@
++++
+title = 'watch'
+date = 2025-01-30T07:07:07+01:00
+description = 'Watch for the results of given node'
++++
+
+This command waits for the results of particular node id.
+
+Example:
+```sh
+kci-dev watch --nodeid 679a91b565fae3351e2fac77 --job-filter "kbuild-gcc-12-x86-chromeos-amd"
+```
+
+`--job-filter` and `--test` work in the same manner as in the [checkout](../checkout.md) command.
+
+## --node-id
+
+The Maestro node id to watch for.
+
+## --job-filter
+
+Pass one or more job filters:
+
+```sh
+kci-dev watch --nodeid 679a91b565fae3351e2fac77 --job-filter "kbuild-gcc-12-x86-chromeos-amd" --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm
+```
+
+### --test
+
+Return code of kci-dev will depend on the test result for the supplied test name:
+
+- `pass` - return code 0 (test passed)
+- `fail` - return code 1 (test failed)
+- `error` - return code 2 (prior steps failed, such as compilation, test setup, etc, or infrastructure error)
+- `critical error` - return code 64 (kci-dev failed to execute command, crashed, etc)
+
+For example:
+```sh
+kci-dev watch --nodeid 679a91b565fae3351e2fac77 --job-filter baseline-nfs-arm64-qualcomm  --test crit
+```
+
+This command can be used for regression bisection, where you can test if the test `crit` pass or fail on the specific commit.
diff --git a/kcidev/libs/maestro_common.py b/kcidev/libs/maestro_common.py
@@ -2,8 +2,10 @@
 # -*- coding: utf-8 -*-
 
 import json
+import time
 
 import click
+import requests
 
 from kcidev.libs.common import *
 
@@ -23,3 +25,189 @@ def maestro_api_error(response):
     except Exception as e:
         kci_err(f"API response error: {e}: {response.text}")
     return
+
+
+def maestro_print_nodes(nodes, field):
+    res = []
+    if not isinstance(nodes, list):
+        nodes = [nodes]
+    for node in nodes:
+        if field:
+            data = {}
+            for f in field:
+                data[f] = node.get(f)
+            res.append(data)
+        else:
+            res.append(node)
+        kci_msg(json.dumps(res, sort_keys=True, indent=4))
+
+
+def maestro_get_node(url, nodeid):
+    headers = {
+        "Content-Type": "application/json; charset=utf-8",
+    }
+    url = url + "latest/node/" + nodeid
+    maestro_print_api_call(url)
+    response = requests.get(url, headers=headers)
+    try:
+        response.raise_for_status()
+    except requests.exceptions.HTTPError as ex:
+        kci_err(ex.response.json().get("detail"))
+        click.Abort()
+    except Exception as ex:
+        kci_err(ex)
+        click.Abort()
+
+    return response.json()
+
+
+def maestro_get_nodes(url, limit, offset, filter):
+    headers = {
+        "Content-Type": "application/json; charset=utf-8",
+    }
+    url = url + "latest/nodes/fast?limit=" + str(limit) + "&offset=" + str(offset)
+    maestro_print_api_call(url)
+    if filter:
+        for f in filter:
+            # TBD: We need to add translate filter to API
+            # if we need anything more complex than eq(=)
+            url = url + "&" + f
+
+    response = requests.get(url, headers=headers)
+    try:
+        response.raise_for_status()
+    except requests.exceptions.HTTPError as ex:
+        kci_err(ex.response.json().get("detail"))
+        click.Abort()
+    except Exception as ex:
+        kci_err(ex)
+        click.Abort()
+
+    return response.json()
+
+
+def maestro_check_node(node):
+    """
+    Node can be defined RUNNING/DONE/FAIL based on the state
+    Simplify, as our current state model suboptimal
+    """
+    name = node["name"]
+    state = node["state"]
+    result = node["result"]
+    if name == "checkout":
+        if state == "running":
+            return "RUNNING"
+        elif state == "available" or state == "closing":
+            return "DONE"
+        elif state == "done" and result == "pass":
+            return "DONE"
+        else:
+            return "FAIL"
+    else:
+        if state == "running":
+            return "RUNNING"
+        elif state == "done" and result == "pass":
+            return "DONE"
+        else:
+            return "FAIL"
+
+
+def maestro_retrieve_treeid_nodes(baseurl, token, treeid):
+    url = baseurl + "latest/nodes/fast?treeid=" + treeid
+    headers = {
+        "Content-Type": "application/json; charset=utf-8",
+        "Authorization": f"{token}",
+    }
+    try:
+        response = requests.get(url, headers=headers, timeout=30)
+    except requests.exceptions.RequestException as e:
+        click.secho(f"API connection error: {e}, retrying...", fg="yellow")
+        return None
+    except Exception as e:
+        click.secho(f"API connection error: {e}, retrying...", fg="yellow")
+        return None
+
+    if response.status_code >= 400:
+        maestro_api_error(response)
+        return None
+
+    return response.json()
+
+
+def maestro_watch_jobs(baseurl, token, treeid, job_filter, test):
+    # we need to add to job_filter "checkout" node
+    job_filter = list(job_filter)
+    job_filter.append("checkout")
+    previous_nodes = None
+    while True:
+        inprogress = 0
+        joblist = job_filter.copy()
+        nodes = maestro_retrieve_treeid_nodes(baseurl, token, treeid)
+        if not nodes:
+            click.secho("No nodes found. Retrying...", fg="yellow")
+            time.sleep(5)
+            continue
+        if previous_nodes == nodes:
+            kci_msg_nonl(".")
+            time.sleep(30)
+            continue
+
+        time_local = time.localtime()
+        click.echo(f"\nCurrent time: {time.strftime('%Y-%m-%d %H:%M:%S', time_local)}")
+        click.secho(
+            f"Total tree nodes {len(nodes)} found. job_filter: {job_filter}", fg="green"
+        )
+
+        # Tricky part in watch is that we might have one item in job_filter (job, test),
+        # but it might spawn multiple nodes with same name
+        test_result = None
+        jobs_done_ts = None
+        for node in nodes:
+            if node["name"] == test:
+                test_result = node["result"]
+            if node["name"] in job_filter:
+                result = maestro_check_node(node)
+                if result == "DONE":
+                    if isinstance(joblist, list) and node["name"] in joblist:
+                        joblist.remove(node["name"])
+                    color = "green"
+                elif result == "RUNNING":
+                    inprogress += 1
+                    color = "yellow"
+                else:
+                    if isinstance(joblist, list) and node["name"] in joblist:
+                        joblist.remove(node["name"])
+                    color = "red"
+                    # if test is same as job, dont indicate infra-failure if test job fail
+                    if test and test != node["name"]:
+                        # if we have a test, and prior job failed, we should indicate that
+                        kci_err(f"Job {node['name']} failed, test can't be executed")
+                        sys.exit(2)
+                nodeid = node.get("id")
+                click.secho(
+                    f"Node: {nodeid} job: {node['name']} State: {node['state']} Result: {node['result']}",
+                    fg=color,
+                )
+        if isinstance(joblist, list) and len(joblist) == 0 and inprogress == 0:
+            click.secho("All jobs completed", fg="green")
+            if not test:
+                return
+            else:
+                if not jobs_done_ts:
+                    jobs_done_ts = time.time()
+                # if all jobs done, usually test results must be available
+                # max within 60s. Safeguard in case of test node is not available
+                if not test_result and time.time() - jobs_done_ts < 60:
+                    continue
+
+                if test_result and test_result == "pass":
+                    click.secho(f"Test {test} passed", fg="green")
+                    sys.exit(0)
+                elif test_result:
+                    # ignore null, that means result not ready yet
+                    kci_err(f"Test {test} failed: {test_result}")
+                    sys.exit(1)
+
+        kci_msg_nonl(f"\rRefresh every 30s...")
+        previous_nodes = nodes
+        time.sleep(30)
diff --git a/kcidev/main.py b/kcidev/main.py
@@ -13,6 +13,7 @@
     patch,
     results,
     testretry,
+    watch,
 )
 
 
@@ -56,6 +57,7 @@ def run():
     cli.add_command(maestro_results.maestro_results)
     cli.add_command(testretry.testretry)
     cli.add_command(results.results)
+    cli.add_command(watch.watch)
     cli()