Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend Kettle build fields to be used for determining flakes #18197

Merged
merged 7 commits into from
Jul 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
139 changes: 101 additions & 38 deletions kettle/make_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,78 @@

import model

SECONDS_PER_DAY = 86400

class Build:
"""
Represent Metadata and Details of a build. Leveraging the information in
Started.json and Finished.json
Should confrom to the schema set in TestGrid below
github.com/GoogleCloudPlatform/testgrid/blob/7d818/metadata/job.go#L23-L77
"""
# pylint: disable=too-many-instance-attributes
# Attrs represent underlying build object

def __init__(self, path, tests):
self.path = path
self.test = tests
self.tests_run = len(tests)
self.tests_failed = sum(t.get('failed', 0) for t in tests)
job, number = path_to_job_and_number(path)
self.job = job
self.number = number if number else None
#From Started.json
self.started = None
self.executor = None
self.repo_commit = None
#From Finished.json
self.finished = None
self.result = None
self.passed = None
self.version = None
#From Either/Combo
self.repos = None
self.metadata = None
self.elapsed = None

@classmethod
def generate(cls, path, tests, started, finished, metadata, repos):
build = cls(path, tests)
build.populate_start(started)
build.populate_finish(finished)
build.populate_meta(metadata, repos)
build.set_elapsed()
return build

def as_dict(self):
return {k: v for k, v in self.__dict__.items() if v is not None}

def populate_start(self, started):
if started:
self.started = int(started['timestamp'])
self.executor = started.get('node')
self.repo_commit = started.get('repo-commit', started.get('repo-version'))
self.repos = started.get('repos')

def populate_finish(self, finished):
if finished:
self.finished = int(finished['timestamp'])
self.version = finished.get('version')
if 'result' in finished:
self.result = finished.get('result')
self.passed = self.result == 'SUCCESS'
elif isinstance(finished.get('passed'), bool):
self.passed = finished['passed']
self.result = 'SUCCESS' if self.passed else 'FAILURE'

def populate_meta(self, metadata, repos):
self.metadata = metadata
self.repos = repos

def set_elapsed(self):
if self.started and self.finished:
self.elapsed = self.finished - self.started


def parse_junit(xml):
"""Generate failed tests as a series of dicts. Ignore skipped tests."""
Expand Down Expand Up @@ -125,49 +197,42 @@ def path_to_job_and_number(path):


def row_for_build(path, started, finished, results):
"""
Generate an dictionary that represents a build as described by TestGrid's
job schema. See link for reference.
github.com/GoogleCloudPlatform/testgrid/blob/7d818/metadata/job.go#L23-L77

Args:
path (string): Path to file data for a build
started (dict): Values pulled from started.json for a build
finsihed (dict): Values pulled from finsihed.json for a build
results (array): List of file data that exits under path

Return:
Dict holding metadata and information pertinent to a build
to be stored in BigQuery
"""
tests = []
for result in results:
for test in parse_junit(result):
if '#' in test['name'] and not test.get('failed'):
continue # skip successful repeated tests
tests.append(test)
build = {
'path': path,
'test': tests,
'tests_run': len(tests),
'tests_failed': sum(t.get('failed', 0) for t in tests)
}
job, number = path_to_job_and_number(path)
build['job'] = job
if number:
build['number'] = number

if started:
build['started'] = int(started['timestamp'])
if 'node' in started:
build['executor'] = started['node']
if finished:
build['finished'] = int(finished['timestamp'])
if 'result' in finished:
build['result'] = finished['result']
build['passed'] = build['result'] == 'SUCCESS'
elif isinstance(finished.get('passed'), bool):
build['passed'] = finished['passed']
build['result'] = 'SUCCESS' if build['passed'] else 'FAILURE'
if 'version' in finished:
build['version'] = finished['version']

def get_metadata():
metadata = None
metapairs = None
repos = None
if finished and 'metadata' in finished:
metadata = finished['metadata']
elif started:
metadata = started.get('metadata')

if metadata:
# clean useless/duplicated metadata fields
if 'repo' in metadata and not metadata['repo']:
metadata.pop('repo')
build_version = build.get('version', 'N/A')
build_version = finished.get('version', 'N/A')
if metadata.get('job-version') == build_version:
metadata.pop('job-version')
if metadata.get('version') == build_version:
Expand All @@ -176,16 +241,14 @@ def get_metadata():
if not isinstance(value, str):
# the schema specifies a string value. force it!
metadata[key] = json.dumps(value)
if not metadata:
return None
return [{'key': k, 'value': v} for k, v in sorted(metadata.items())]
if key == 'repos':
repos = metadata[key]
metapairs = [{'key': k, 'value': v} for k, v in sorted(metadata.items())]
return metapairs, repos

metadata = get_metadata()
if metadata:
build['metadata'] = metadata
if started and finished:
build['elapsed'] = build['finished'] - build['started']
return build
metadata, repos = get_metadata()
build = Build.generate(path, tests, started, finished, metadata, repos)
return build.as_dict()


def get_table(days):
Expand Down Expand Up @@ -219,14 +282,14 @@ def make_rows(db, builds):


def main(db, opts, outfile):
min_started = None
min_started = 0
if opts.days:
min_started = time.time() - (opts.days or 1) * 24 * 60 * 60
min_started = time.time() - (opts.days or 1) * SECONDS_PER_DAY
incremental_table = get_table(opts.days)

if opts.assert_oldest:
oldest = db.get_oldest_emitted(incremental_table)
if oldest < time.time() - opts.assert_oldest * 24 * 60 * 60:
if oldest < time.time() - opts.assert_oldest * SECONDS_PER_DAY:
return 1
return 0

Expand Down
16 changes: 12 additions & 4 deletions kettle/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,22 @@ def _init_incremental(self, table):
@staticmethod
def _get_builds(results):
for rowid, path, started, finished in results:
started = started and json.loads(started)
finished = finished and json.loads(finished)
started = json.loads(started) if started else started
finished = json.loads(finished) if finished else finished
yield rowid, path, started, finished

def get_builds(self, path='', min_started=None, incremental_table=DEFAULT_INCREMENTAL_TABLE):
def get_builds(self, path='', min_started=0, incremental_table=DEFAULT_INCREMENTAL_TABLE):
"""
Iterate through (buildid, gcs_path, started, finished) for each build under
the given path that has not already been emitted.

Args:
path (string, optional): build path to fetch
min_started (int, optional): epoch time to fetch builds since
incremental_table (string, optional): table name

Returns:
Generator containing rowID, path, and dicts representing the started and finished json
"""
self._init_incremental(incremental_table)
results = self.db.execute(
Expand All @@ -135,7 +143,7 @@ def get_builds(self, path='', min_started=None, incremental_table=DEFAULT_INCREM
' and finished_time >= ?' +
' and rowid not in (select build_id from %s)'
' order by finished_time' % incremental_table
, (path + '%', min_started or 0)).fetchall()
, (path + '%', min_started)).fetchall()
return self._get_builds(results)

def get_builds_from_paths(self, paths, incremental_table=DEFAULT_INCREMENTAL_TABLE):
Expand Down