Skip to content

Commit

Permalink
Merge pull request #18197 from MushuEE/feature/add_new_kettle_fields
Browse files Browse the repository at this point in the history
Extend Kettle build fields to be used for determining flakes
  • Loading branch information
k8s-ci-robot committed Jul 10, 2020
2 parents 830b617 + b13c387 commit 1b90b9c
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 42 deletions.
139 changes: 101 additions & 38 deletions kettle/make_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,78 @@

import model

SECONDS_PER_DAY = 86400

class Build:
"""
Represent Metadata and Details of a build. Leveraging the information in
Started.json and Finished.json
Should confrom to the schema set in TestGrid below
github.com/GoogleCloudPlatform/testgrid/blob/7d818/metadata/job.go#L23-L77
"""
# pylint: disable=too-many-instance-attributes
# Attrs represent underlying build object

def __init__(self, path, tests):
self.path = path
self.test = tests
self.tests_run = len(tests)
self.tests_failed = sum(t.get('failed', 0) for t in tests)
job, number = path_to_job_and_number(path)
self.job = job
self.number = number if number else None
#From Started.json
self.started = None
self.executor = None
self.repo_commit = None
#From Finished.json
self.finished = None
self.result = None
self.passed = None
self.version = None
#From Either/Combo
self.repos = None
self.metadata = None
self.elapsed = None

@classmethod
def generate(cls, path, tests, started, finished, metadata, repos):
build = cls(path, tests)
build.populate_start(started)
build.populate_finish(finished)
build.populate_meta(metadata, repos)
build.set_elapsed()
return build

def as_dict(self):
return {k: v for k, v in self.__dict__.items() if v is not None}

def populate_start(self, started):
if started:
self.started = int(started['timestamp'])
self.executor = started.get('node')
self.repo_commit = started.get('repo-commit', started.get('repo-version'))
self.repos = started.get('repos')

def populate_finish(self, finished):
if finished:
self.finished = int(finished['timestamp'])
self.version = finished.get('version')
if 'result' in finished:
self.result = finished.get('result')
self.passed = self.result == 'SUCCESS'
elif isinstance(finished.get('passed'), bool):
self.passed = finished['passed']
self.result = 'SUCCESS' if self.passed else 'FAILURE'

def populate_meta(self, metadata, repos):
self.metadata = metadata
self.repos = repos

def set_elapsed(self):
if self.started and self.finished:
self.elapsed = self.finished - self.started


def parse_junit(xml):
"""Generate failed tests as a series of dicts. Ignore skipped tests."""
Expand Down Expand Up @@ -125,49 +197,42 @@ def path_to_job_and_number(path):


def row_for_build(path, started, finished, results):
"""
Generate an dictionary that represents a build as described by TestGrid's
job schema. See link for reference.
github.com/GoogleCloudPlatform/testgrid/blob/7d818/metadata/job.go#L23-L77
Args:
path (string): Path to file data for a build
started (dict): Values pulled from started.json for a build
finsihed (dict): Values pulled from finsihed.json for a build
results (array): List of file data that exits under path
Return:
Dict holding metadata and information pertinent to a build
to be stored in BigQuery
"""
tests = []
for result in results:
for test in parse_junit(result):
if '#' in test['name'] and not test.get('failed'):
continue # skip successful repeated tests
tests.append(test)
build = {
'path': path,
'test': tests,
'tests_run': len(tests),
'tests_failed': sum(t.get('failed', 0) for t in tests)
}
job, number = path_to_job_and_number(path)
build['job'] = job
if number:
build['number'] = number

if started:
build['started'] = int(started['timestamp'])
if 'node' in started:
build['executor'] = started['node']
if finished:
build['finished'] = int(finished['timestamp'])
if 'result' in finished:
build['result'] = finished['result']
build['passed'] = build['result'] == 'SUCCESS'
elif isinstance(finished.get('passed'), bool):
build['passed'] = finished['passed']
build['result'] = 'SUCCESS' if build['passed'] else 'FAILURE'
if 'version' in finished:
build['version'] = finished['version']

def get_metadata():
metadata = None
metapairs = None
repos = None
if finished and 'metadata' in finished:
metadata = finished['metadata']
elif started:
metadata = started.get('metadata')

if metadata:
# clean useless/duplicated metadata fields
if 'repo' in metadata and not metadata['repo']:
metadata.pop('repo')
build_version = build.get('version', 'N/A')
build_version = finished.get('version', 'N/A')
if metadata.get('job-version') == build_version:
metadata.pop('job-version')
if metadata.get('version') == build_version:
Expand All @@ -176,16 +241,14 @@ def get_metadata():
if not isinstance(value, str):
# the schema specifies a string value. force it!
metadata[key] = json.dumps(value)
if not metadata:
return None
return [{'key': k, 'value': v} for k, v in sorted(metadata.items())]
if key == 'repos':
repos = metadata[key]
metapairs = [{'key': k, 'value': v} for k, v in sorted(metadata.items())]
return metapairs, repos

metadata = get_metadata()
if metadata:
build['metadata'] = metadata
if started and finished:
build['elapsed'] = build['finished'] - build['started']
return build
metadata, repos = get_metadata()
build = Build.generate(path, tests, started, finished, metadata, repos)
return build.as_dict()


def get_table(days):
Expand Down Expand Up @@ -219,14 +282,14 @@ def make_rows(db, builds):


def main(db, opts, outfile):
min_started = None
min_started = 0
if opts.days:
min_started = time.time() - (opts.days or 1) * 24 * 60 * 60
min_started = time.time() - (opts.days or 1) * SECONDS_PER_DAY
incremental_table = get_table(opts.days)

if opts.assert_oldest:
oldest = db.get_oldest_emitted(incremental_table)
if oldest < time.time() - opts.assert_oldest * 24 * 60 * 60:
if oldest < time.time() - opts.assert_oldest * SECONDS_PER_DAY:
return 1
return 0

Expand Down
16 changes: 12 additions & 4 deletions kettle/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,22 @@ def _init_incremental(self, table):
@staticmethod
def _get_builds(results):
for rowid, path, started, finished in results:
started = started and json.loads(started)
finished = finished and json.loads(finished)
started = json.loads(started) if started else started
finished = json.loads(finished) if finished else finished
yield rowid, path, started, finished

def get_builds(self, path='', min_started=None, incremental_table=DEFAULT_INCREMENTAL_TABLE):
def get_builds(self, path='', min_started=0, incremental_table=DEFAULT_INCREMENTAL_TABLE):
"""
Iterate through (buildid, gcs_path, started, finished) for each build under
the given path that has not already been emitted.
Args:
path (string, optional): build path to fetch
min_started (int, optional): epoch time to fetch builds since
incremental_table (string, optional): table name
Returns:
Generator containing rowID, path, and dicts representing the started and finished json
"""
self._init_incremental(incremental_table)
results = self.db.execute(
Expand All @@ -135,7 +143,7 @@ def get_builds(self, path='', min_started=None, incremental_table=DEFAULT_INCREM
' and finished_time >= ?' +
' and rowid not in (select build_id from %s)'
' order by finished_time' % incremental_table
, (path + '%', min_started or 0)).fetchall()
, (path + '%', min_started)).fetchall()
return self._get_builds(results)

def get_builds_from_paths(self, paths, incremental_table=DEFAULT_INCREMENTAL_TABLE):
Expand Down

0 comments on commit 1b90b9c

Please sign in to comment.