# Get jobs from the Buildkite API

Ensure the `BUILDKITE_PAT` environmental variable is set before running this notebook.
It can be stored in the `.env` file at the top of the repo, which is kept out of
version control.

In [1]:
import os

if os.path.basename(os.getcwd()) == "notebooks":
    print("Moving to repo root")
    os.chdir("..")

Moving to repo root


In [2]:
import dotenv

dotenv.load_dotenv()

False

In [3]:
import os

token = os.getenv("BUILDKITE_PAT")

if not token:
    raise RuntimeError("No Buildkite token found")

In [4]:
import requests

base_url = "https://api.buildkite.com/v2"

h = {"Authorization": f"Bearer {token}"}

In [5]:
def get_jobs(finished_from: str) -> list[dict]:
    """Get all flagship AMIP jobs since ``finished_from``."""
    jobs = []
    builds = requests.get(
        base_url + "/organizations/clima/pipelines/climacoupler-coarse-nightly-amip/builds",
        headers=h,
        params={"finished_from": finished_from, "branch": "main"},
    ).json()
    for b in builds:
        for j in b["jobs"]:
            name = j.get("name") or ""
            if "flagship amip" in name.lower() and "16 helem" in name.lower():
                job = {
                    "build_id": b.get("id"),
                    "build_state": b.get("state"),
                    "build_created_at": b.get("created_at"),
                    "build_finished_at": b.get("finished_at"),
                    "commit": b.get("commit"),
                    "branch": b.get("branch"),
                    "name": name,
                    "command": j.get("command"),
                    "state": j.get("state"),
                    "created_at": j.get("created_at"),
                    "finished_at": j.get("finished_at"),
                    "id": j.get("id"),
                }
                raw_log_url = j.get("raw_log_url")
                if raw_log_url:
                    log_resp = requests.get(raw_log_url, headers=h)
                    job["raw_log_txt"] = log_resp.text
                jobs.append(job)
    return jobs

In [6]:
from datetime import datetime
import glob
import json

# Let's see what job's we've already fetched and pick up
# from there
bk_dir = "buildkite"
min_finished_from = "2026-01-14"  # First ever successful build

if os.path.basename(os.getcwd()) == "notebooks":
    print("Moving to repo root")
    os.chdir("..")

os.makedirs(bk_dir, exist_ok=True)

# Jobs in the buildkite folder are JSON files named by the Unix timestamp
# their build finished, so we know the last job's build we were able to fetch,
# and can use that for the remaining data
latest_build_finished = datetime.fromisoformat(min_finished_from)
build_files = [fname for fname in os.listdir(bk_dir) if fname.endswith(".json")]
if build_files:
    latest_unix_ts = float(sorted(build_files)[-1].removesuffix(".json"))
    latest_build_finished = datetime.fromtimestamp(latest_unix_ts)

print(f"Fetching jobs from builds finished at {latest_build_finished}")

jobs = get_jobs(finished_from=latest_build_finished.isoformat())

print(f"Fetched {len(jobs)} jobs")

for job in jobs:
    unix_ts = datetime.fromisoformat(job["build_finished_at"]).timestamp()
    fpath = os.path.join(bk_dir, str(unix_ts) + ".json")
    with open(fpath, "w") as f:
        json.dump(job, f, indent=2)

Fetching jobs from builds finished at 2026-02-20 12:49:44.341000


Fetched 1 jobs
