diff --git a/.github/workflows/recent_fail_summary.yaml b/.github/workflows/recent_fail_summary.yaml index 08f9a793eb91f8..5c23b5be881e73 100644 --- a/.github/workflows/recent_fail_summary.yaml +++ b/.github/workflows/recent_fail_summary.yaml @@ -15,7 +15,7 @@ name: Recent Fail Summary on: schedule: - - cron: "0 0 * * *" + - cron: "10 0 * * *" workflow_dispatch: concurrency: @@ -25,24 +25,34 @@ jobs: list_workflows: name: Summarize Recent Workflow Failures runs-on: ubuntu-latest + permissions: write-all steps: - uses: actions/checkout@v4 - - run: pip install pandas python-slugify + - run: pip install pandas python-slugify pyyaml tabulate - name: Run Summarization Script run: python scripts/tools/summarize_fail.py env: GH_TOKEN: ${{ github.token }} + - name: Update Docs + uses: test-room-7/action-update-file@v1 + with: + file-path: docs/daily_pass_percentage.md + commit-msg: Update daily pass percentage + github-token: ${{ secrets.GITHUB_TOKEN }} + branch: daily_pass_percentage - name: Upload Logs uses: actions/upload-artifact@v3 with: name: workflow-fail-summary path: | - run_list.json + fail_run_list.json + all_run_list.json recent_fails.csv recent_fails_frequency.csv failure_cause_summary.csv - workflow_fail_rate.csv + workflow_pass_rate.csv + workflow_pass_rate.sqlite3 recent_fails_logs - workflow_fail_rate + workflow_pass_rate retention-days: 5 diff --git a/scripts/tools/build_fail_defs.yaml b/scripts/tools/build_fail_defs.yaml new file mode 100644 index 00000000000000..8d6e782c2705b9 --- /dev/null +++ b/scripts/tools/build_fail_defs.yaml @@ -0,0 +1,20 @@ +CodeQL: + No space left on device: + short: Ran out of space + detail: Exception with signature "No space left on device" + Check that the disk containing the database directory has ample free space.: + short: Ran out of space + detail: + Fatal internal error with message indicating that disk space most + likely ran out +Build example: + Could not find a version that satisfies the requirement: + short: Requirements issue + detail: Unable to install a requirements in Python requirements.txt + No module named: + short: Missing module + detail: Expected module was missing +Full builds: + No space left on device: + short: Ran out of space + detail: Exception with signature "No space left on device diff --git a/scripts/tools/summarize_fail.py b/scripts/tools/summarize_fail.py index 85283784f9c50c..04b3b33077c4b6 100644 --- a/scripts/tools/summarize_fail.py +++ b/scripts/tools/summarize_fail.py @@ -1,38 +1,31 @@ +import datetime import logging import os +import sqlite3 import subprocess import pandas as pd +import yaml from slugify import slugify -error_catalog = { - "CodeQL": { - "No space left on device": { - "short": "Ran out of space", - "detail": "Exception with signature \"No space left on device\"" - }, - "Check that the disk containing the database directory has ample free space.": { - "short": "Ran out of space", - "detail": "Fatal internal error with message indicating that disk space most likely ran out" - } - }, - "Build example": { - "Could not find a version that satisfies the requirement": { - "short": "Requirements issue", - "detail": "Unable to install a requirements in Python requirements.txt" - }, - "No module named": { - "short": "Missing module", - "detail": "Expected module was missing" - } - }, - "Full builds": { - "No space left on device": { - "short": "Ran out of space", - "detail": "Exception with signature \"No space left on device\"" - } - } -} +yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') + +with open("scripts/tools/build_fail_defs.yaml", "r") as fail_defs: + try: + error_catalog = yaml.safe_load(fail_defs) + except Exception: + logging.exception("Could not load fail definition file.") + + +def pass_fail_rate(workflow): + logging.info(f"Checking recent pass/fail rate of workflow {workflow}.") + workflow_fail_rate_output_path = f"workflow_pass_rate/{slugify(workflow)}" + if not os.path.exists(workflow_fail_rate_output_path): + os.makedirs(workflow_fail_rate_output_path) + subprocess.run( + f"gh run list -R project-chip/connectedhomeip -b master -w '{workflow}' -L 500 --created {yesterday} --json conclusion > {workflow_fail_rate_output_path}/run_list.json", shell=True) + else: + logging.info("This workflow has already been processed.") def process_fail(id, pr, start_time, workflow): @@ -57,28 +50,20 @@ def process_fail(id, pr, start_time, workflow): root_cause = error_catalog[workflow_category][error_message]["short"] break - logging.info(f"Checking recent pass/fail rate of workflow {workflow}.") - workflow_fail_rate_output_path = f"workflow_pass_rate/{slugify(workflow)}" - if not os.path.exists(workflow_fail_rate_output_path): - os.makedirs(workflow_fail_rate_output_path) - subprocess.run( - f"gh run list -R project-chip/connectedhomeip -b master -w '{workflow}' --json conclusion > {workflow_fail_rate_output_path}/run_list.json", shell=True) - else: - logging.info("This workflow has already been processed.") - return [pr, workflow, root_cause] def main(): - logging.info("Gathering recent fails information into run_list.json.") - subprocess.run("gh run list -R project-chip/connectedhomeip -b master -s failure --json databaseId,displayTitle,startedAt,workflowName > run_list.json", shell=True) + logging.info("Gathering recent fails information into fail_run_list.json.") + subprocess.run( + f"gh run list -R project-chip/connectedhomeip -b master -s failure -L 500 --created {yesterday} --json databaseId,displayTitle,startedAt,workflowName > fail_run_list.json", shell=True) - logging.info("Reading run_list.json into a DataFrame.") - df = pd.read_json("run_list.json") + logging.info("Reading fail_run_list.json into a DataFrame.") + df = pd.read_json("fail_run_list.json") - logging.info("Listing recent fails.") + logging.info(f"Listing recent fails from {yesterday}.") df.columns = ["ID", "Pull Request", "Start Time", "Workflow"] - print("Recent Fails:") + print(f"Recent Fails From {yesterday}:") print(df.to_string(columns=["Pull Request", "Workflow"], index=False)) print() df.to_csv("recent_fails.csv", index=False) @@ -91,6 +76,17 @@ def main(): print() frequency.to_csv("recent_workflow_fails_frequency.csv") + logging.info("Gathering recent runs information into all_run_list.json.") + subprocess.run( + f"gh run list -R project-chip/connectedhomeip -b master -L 5000 --created {yesterday} --json workflowName > all_run_list.json", shell=True) + + logging.info("Reading all_run_list.json into a DataFrame.") + all_df = pd.read_json("all_run_list.json") + + logging.info("Gathering pass/fail rate info.") + all_df.columns = ["Workflow"] + all_df.apply(lambda row: pass_fail_rate(row["Workflow"]), axis=1) + logging.info("Conducting fail information parsing.") root_causes = df.apply(lambda row: process_fail(row["ID"], row["Pull Request"], row["Start Time"], row["Workflow"]), axis=1, result_type="expand") @@ -100,19 +96,25 @@ def main(): print() root_causes.to_csv("failure_cause_summary.csv") - logging.info("Listing percent fail rate of recent fails by workflow.") - fail_rate = {} + logging.info("Listing percent pass rate of recent fails by workflow.") + pass_rate = {} for workflow in next(os.walk("workflow_pass_rate"))[1]: try: info = pd.read_json(f"workflow_pass_rate/{workflow}/run_list.json") info = info[info["conclusion"].str.len() > 0] - fail_rate[workflow] = [info.value_counts(normalize=True).mul(100).round()["failure"]] except Exception: logging.exception(f"Recent runs info for {workflow} was not collected.") - fail_rate = pd.DataFrame.from_dict(fail_rate, 'index', columns=["Fail Rate"]) - print("Recent Fail Rate of Each Workflow:") - print(fail_rate.to_string()) - fail_rate.to_csv("workflow_fail_rate.csv") + try: + pass_rate[workflow] = [info.value_counts(normalize=True).mul(100).round()["success"]] + except Exception: + pass_rate[workflow] = [0.0] + pass_rate = pd.DataFrame.from_dict(pass_rate, 'index', columns=["Pass Rate"]).sort_values("Pass Rate") + print("Recent Pass Rate of Each Workflow:") + pass_rate.to_markdown("docs/daily_pass_percentage.md") + pass_rate_sql = sqlite3.connect("workflow_pass_rate.sqlite3") + pass_rate.to_sql("workflow_pass_rate", pass_rate_sql, if_exists="replace") + print(pass_rate.to_string()) + pass_rate.to_csv("workflow_pass_rate.csv") if __name__ == "__main__":