In [406]:
from circleci.api import Api
import pprint
from collections import defaultdict

token = open(".env").readlines()[0].split("=")[1].strip()
circleci = Api(token)

# get info about your user
#pprint.pprint(circleci.get_user_info())

# get list of all of your projects
# --> build_num --> get_build_info() -> steps -> output_url -> fetch it -> x[0]["message"] is newline delim string
# --> .outcome == "failed" e.g.

results = defaultdict(lambda: defaultdict(int))

# get results 100 at a time

get_how_many = 32
per_page = 100
pages = max(1, int(get_how_many / per_page))
builds = []

for i in range(pages):
    for build in circleci.get_project_build_summary(
            "pachyderm", "pachyderm", limit=min(100, get_how_many), offset=i*per_page,
        ):
        builds.append(build)
        outcome = build["outcome"]
        build_num = build["build_num"]

        if not outcome == "failed" and not outcome == "success":
            continue

        job = build["workflows"]["job_name"]
        #print(f"build {build_num} {outcome} {job}")
        if job.startswith("test-"):
            results[job][outcome] += 1

#builds = builds[:12]

# Finding flakiest test suites

The following produces a table of test suites ordered by flakiest suite first.

In [407]:
import pandas as pd
df = pd.DataFrame.from_dict(results)

# Transpose
df = df.T

# NaN -> 0
df = df.fillna(0)

df['pass_rate'] = df['success'] / (df['failed'] + df['success'])

# sort by failed
df = df.sort_values(by=["pass_rate"])

df

Unnamed: 0,failed,success,pass_rate
test-AUTH2,3.0,0.0,0.0
test-MISC,3.0,0.0,0.0
test-EXAMPLES,2.0,0.0,0.0
test-PPS3,2.0,1.0,0.333333
test-PPS1,2.0,1.0,0.333333
test-ADMIN,1.0,1.0,0.5
test-PPS4,0.0,1.0,1.0
test-PPS5,0.0,1.0,1.0
test-PPS6,0.0,1.0,1.0


# Finding flakiest individual tests
Now we fetch the logs for each individual test and find the flakiest individual tests.

In [408]:
import requests
cache = {}
build_info_cache = {}

In [409]:
build_map = {}

In [410]:
def parse_build(build):
    tests = []
    failed = set()
    passed = set()
    skipped = set()
    try:
        output_url = build["steps"][5]["actions"][0]["output_url"]
    except Exception as e:
        print(f"Got error: {e}, continuing...")
        return set(), set(), set()
    
    if output_url in cache:
        lines = cache[output_url]
    else:
        lines = requests.get(output_url).json()
        cache[output_url] = lines

    for line in lines[0]["message"].split("\n"):
        #print(line)
        if "RUN" in line:
            parts = line.split("RUN")
            if len(parts) != 2:
                continue
            preamble, test = parts
            test = test.strip()
            # ignore docker RUN lines which contain e.g. "#10"
            if "#" in preamble or "Step" in preamble:
                continue
            if len(test) > 100:
                # some base64 gunk
                continue
            if "AcceptEnv" in test or "cd pachyderm" in test \
                    or "git clone" in test or "NING" in test or "_BAD_TESTS" in test:
                continue
            tests.append(test)
        if "FAIL" in line:
            test = line.split("FAIL")[1].replace(":", "").strip().split("(")[0].strip()
            if "github.com" in test or test == "":
                # filter out some more noise
                continue
            if len(test) > 100:
                # some base64 gunk
                continue
            failed.add(test)
        if "PASS" in line:
            test = line.split("PASS")[1].replace(":", "").strip().split("(")[0].strip()
            if test == "":
                # This happens when all the tests pass, we get a "PASS" on its own.
                continue
            if len(test) > 100:
                # some base64 gunk
                continue
            if "\\n" in test:
                continue
            passed.add(test)
        if "SKIP" in line:
            test = line.split("SKIP")[1].replace(":", "").strip().split("(")[0].strip()
            if test == "":
                # This happens when all the tests pass, we get a "PASS" on its own.
                continue
            if len(test) > 100:
                # some base64 gunk
                continue
            if "\\n" in test:
                continue
            skipped.add(test)

    all_tests = set(tests)
    for test in all_tests:
        if test not in build_map:
            build_map[test] = build["workflows"]["job_name"], \
            f"<a target='_blank' href='{build['build_url']}'>{build['build_num']}</a>"
    hung = all_tests - failed - passed - skipped
    assert all_tests == (failed | passed | hung | skipped), \
        f"all={all_tests}, failed={failed}, passed={passed}, hung={hung}, skipped={skipped}"
    return passed, failed, hung

In [None]:
build_results = defaultdict(lambda: defaultdict(int))

for build in builds:
    print(".", end="")
    if build["build_num"] in build_info_cache:
        build_info = build_info_cache[build["build_num"]]
    else:
        build_info = circleci.get_build_info("pachyderm", "pachyderm", build["build_num"])
        build_info_cache[build["build_num"]] = build_info
    passed, failed, hung = parse_build(build_info)
    for b in passed:
        build_results[b]["pass"] += 1
        build_results[b]["bucket"] = build_map[b][0]
    for b in failed:
        build_results[b]["fail"] += 1
        build_results[b]["bucket"] = build_map[b][0]
        build_results[b]["example"] = build_map[b][1]
    for b in hung:
        build_results[b]["hung"] += 1
        build_results[b]["bucket"] = build_map[b][0]
        build_results[b]["example"] = build_map[b][1]

.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: 'output_url', continuing...
.Got error: list index out of range, continuing...
........

In [None]:
import pandas as pd
df = pd.DataFrame.from_dict(build_results)

# Transpose
df = df.T

# NaN -> 0
df = df.fillna(0)

df['hang_rate'] = df['hung'] / (df['pass'] + df['fail'] + df['hung'])
df['fail_rate'] = df['fail'] / (df['pass'] + df['fail'] + df['hung'])
df['hang_or_fail'] = df['hang_rate'] + df['fail_rate']

hangy = df[df["hang_rate"] > 0]
faily = df[df["fail_rate"] > 0]

# sort by failed
hangy = hangy.sort_values(by=["hang_rate"], ascending=False)
faily = hangy.sort_values(by=["fail_rate"], ascending=False)

bad = df[df["hang_or_fail"] > 0]
bad = bad.sort_values(by=["hang_or_fail"], ascending=False)

In [None]:
pandas.set_option('display.max_rows', None)

In [None]:
from IPython.display import HTML
HTML(bad.to_html(escape=False))