Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 55 additions & 33 deletions treeherder/log_parser/intermittents.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,38 @@
import datetime

from treeherder.model.models import Group, GroupStatus, Job, Push
from treeherder.model.models import Group, GroupStatus, Job, Push, TextLogError


def classify(jobs_to_classify, jobs_to_unclassify):
# TODO: consider job.result=(busted, exception)
if jobs_to_classify:
target_jobs = Job.objects.filter(
id__in=jobs_to_classify, result="testfailed", failure_classification_id__in=[1, 6]
)
if target_jobs:
target_jobs.update(failure_classification_id=8)

if jobs_to_unclassify:
# TODO: query text_log_error for new_failure and use 6 if previously set
new_jobs = (
TextLogError.objects.filter(
job__id__in=jobs_to_unclassify, new_failure=True, job__failure_classification_id=8
)
.values("job__id")
.distinct()
)
jobs_to_newfailure = [j["job__id"] for j in new_jobs]
jobs_to_regular_failure = list(set(jobs_to_unclassify) - set(jobs_to_newfailure))

# classification_id: 6 == new failure needs classification, 1 == no classified
if jobs_to_newfailure:
target_jobs = Job.objects.filter(id__in=jobs_to_newfailure, result="testfailed")
if target_jobs:
target_jobs.update(failure_classification_id=6)
if jobs_to_regular_failure:
target_jobs = Job.objects.filter(id__in=jobs_to_regular_failure, result="testfailed")
if target_jobs:
target_jobs.update(failure_classification_id=1)


def _check_and_mark_infra(current_job, job_ids, push_ids):
Expand Down Expand Up @@ -42,7 +74,7 @@ def _check_and_mark_infra(current_job, job_ids, push_ids):

# ignore previous classified, we are looking for NEW extra jobs
if len([ej for ej in extra_jobs if ej["failure_classification_id"] != 8]) == 0:
return
return [], []

# ensure 50% 'success' rate
# success here means the task ran and produced groups | is success
Expand All @@ -52,20 +84,24 @@ def _check_and_mark_infra(current_job, job_ids, push_ids):
if job["id"] not in job_ids and job["result"] != "success":
extra_failed.append(job)

jobs_to_classify = []
jobs_to_unclassify = []

# look for failure rate > 50% and exit early
if len(extra_failed) / len(extra_jobs) > 0.5:
# as failure rate > 50%, if any jobs are fc_id=8 classify as fc_id=1
for job in extra_failed:
if job["failure_classification_id"] == 8:
Job.objects.filter(id=job["id"]).update(failure_classification_id=1)
return
jobs_to_unclassify.append(job["id"])

# any extra_jobs will be failures without groups (infra/timeout/etc.)
# theoretically there could be many jobs here
# mark extra_jobs as `intermittent_needs_classification`
for job in extra_failed:
if job["failure_classification_id"] not in [4, 8]:
Job.objects.filter(id=job["id"]).update(failure_classification_id=8)
jobs_to_classify.append(job["id"])

return jobs_to_classify, jobs_to_unclassify


def check_and_mark_intermittent(job_id):
Expand All @@ -86,7 +122,7 @@ def check_and_mark_intermittent(job_id):
# get list of pushes, find the current push and recent pushes
idlist = (
Push.objects.filter(repository__id=current_job.repository.id, time__gte=start_date)
.values("id")
.values_list("id", flat=True)
.order_by("-id")
)
counter = -1
Expand Down Expand Up @@ -135,7 +171,8 @@ def check_and_mark_intermittent(job_id):
# If no groups, look for infra
distinct_job_ids = list(set([f["job_logs__job__id"] for f in all_groups]))
if len(distinct_job_ids) == 1:
return _check_and_mark_infra(current_job, distinct_job_ids, ids)
to_classify, to_unclassify = _check_and_mark_infra(current_job, distinct_job_ids, ids)
return classify(to_classify, to_unclassify)

mappings = {}
job_classifications = {}
Expand All @@ -151,6 +188,7 @@ def check_and_mark_intermittent(job_id):
# we have a variant
continue

# TODO: consider storing a list of job.id that are fc_id=8
# store job:fc_id so we can reference what needs changed
if item["job_logs__job__id"] not in job_classifications:
job_classifications[item["job_logs__job__id"]] = item[
Expand Down Expand Up @@ -181,18 +219,14 @@ def check_and_mark_intermittent(job_id):
current_changed_groups = {}
for group in mappings.get(current_job.push.id, {}).get("groups", []):
all_data = []
current_data = []
current_data = [
mappings[current_job.push.id]["groups"][group][j]
for j in mappings[current_job.push.id]["groups"][group]
]
for id in mappings.keys():
all_data.extend(
[mappings[id]["groups"][group][j] for j in mappings[id]["groups"].get(group, {})]
)
if id == current_job.push.id:
current_data.extend(
[
mappings[id]["groups"][group][j]
for j in mappings[id]["groups"].get(group, {})
]
)

# if new data changes results, update
pass_rate = len([s for s in all_data if s == GroupStatus.OK]) / len(all_data)
Expand All @@ -203,9 +237,9 @@ def check_and_mark_intermittent(job_id):
current_changed_groups[group] = True

# all changed_groups need to be evaluated on previous 'failed' jobs to ensure all groups in that task are 'passing'
jobs_to_classify = [] # mark as fcid=8 (known intermittent)
jobs_to_unclassify = [] # previously parked as fcid=8, new failing data, now fcid=1
for id in mappings.keys():
jobs_to_classify = [] # mark as fcid=8 (known intermittent)
jobs_to_unclassify = [] # previously parked as fcid=8, new failing data, now fcid=1
for job in mappings[id]["jobs"]:
all_green = True
current_all_green = True
Expand All @@ -229,19 +263,7 @@ def check_and_mark_intermittent(job_id):
elif job_classifications[job] == 8:
jobs_to_unclassify.append(job)

# TODO: consider job.result=(busted, exception)
for job in jobs_to_classify:
target_job = Job.objects.filter(
id=job, result="testfailed", failure_classification_id__in=[1, 6]
)
if target_job:
target_job.update(failure_classification_id=8)

for job in jobs_to_unclassify:
target_job = Job.objects.filter(
id=job, result="testfailed", failure_classification_id=8
)
if target_job:
target_job.update(failure_classification_id=1)

return _check_and_mark_infra(current_job, distinct_job_ids, ids)
to_classify, to_unclassify = _check_and_mark_infra(current_job, distinct_job_ids, ids)
jobs_to_classify.extend(to_classify)
jobs_to_unclassify.extend(to_unclassify)
return classify(jobs_to_classify, jobs_to_unclassify)