Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI Timing Statistics #38598

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 6 additions & 1 deletion lib/spack/spack/binary_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import spack.util.gpg
import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
import spack.util.timer as timer
import spack.util.url as url_util
import spack.util.web as web_util
from spack.caches import misc_cache_location
Expand Down Expand Up @@ -1813,10 +1814,11 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
m.linkname = m.linkname[result.end() :]


def extract_tarball(spec, download_result, unsigned=False, force=False):
def extract_tarball(spec, download_result, unsigned=False, force=False, timer=timer.NULL_TIMER):
"""
extract binary tarball for given package into install area
"""
timer.start("extract")
if os.path.exists(spec.prefix):
if force:
shutil.rmtree(spec.prefix)
Expand Down Expand Up @@ -1896,7 +1898,9 @@ def extract_tarball(spec, download_result, unsigned=False, force=False):

os.remove(tarfile_path)
os.remove(specfile_path)
timer.stop("extract")

timer.start("relocate")
try:
relocate_package(spec)
except Exception as e:
Expand All @@ -1917,6 +1921,7 @@ def extract_tarball(spec, download_result, unsigned=False, force=False):
if os.path.exists(filename):
os.remove(filename)
_delete_staged_downloads(download_result)
timer.stop("relocate")


def _ensure_common_prefix(tar: tarfile.TarFile) -> str:
Expand Down
11 changes: 11 additions & 0 deletions lib/spack/spack/cmd/ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import spack.hash_types as ht
import spack.mirror
import spack.util.gpg as gpg_util
import spack.util.timer as timer
import spack.util.url as url_util
import spack.util.web as web_util

Expand Down Expand Up @@ -253,6 +254,8 @@ def ci_rebuild(args):
check a single spec against the remote mirror, and rebuild it from source if the mirror does
not contain the hash
"""
rebuild_timer = timer.Timer()

env = spack.cmd.require_active_env(cmd_name="ci rebuild")

# Make sure the environment is "gitlab-enabled", or else there's nothing
Expand Down Expand Up @@ -736,6 +739,14 @@ def ci_rebuild(args):

print(reproduce_msg)

rebuild_timer.stop()
try:
with open("install_timers.json", "w") as timelog:
extra_attributes = {"name": ".ci-rebuild"}
rebuild_timer.write_json(timelog, extra_attributes=extra_attributes)
except Exception as e:
tty.debug(str(e))

# Tie job success/failure to the success/failure of building the spec
return install_exit_code

Expand Down
18 changes: 14 additions & 4 deletions lib/spack/spack/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@
STATUS_REMOVED = "removed"


def _write_timer_json(pkg, timer, cache):
extra_attributes = {"name": pkg.name, "cache": cache, "hash": pkg.spec.dag_hash()}
try:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this try really shouldn't be required, but for some reason the _install_from_cache routine doesn't actually install anything in the test_install_from_cache_ok test.

with open(pkg.times_log_path, "w") as timelog:
timer.write_json(timelog, extra_attributes=extra_attributes)
except Exception as e:
tty.debug(str(e))
return


class InstallAction:
#: Don't perform an install
NONE = 0
Expand Down Expand Up @@ -399,6 +409,8 @@ def _install_from_cache(
return False
t.stop()
tty.debug("Successfully extracted {0} from binary cache".format(pkg_id))

_write_timer_json(pkg, t, True)
_print_timer(pre=_log_prefix(pkg.name), pkg_id=pkg_id, timer=t)
_print_installed_pkg(pkg.spec.prefix)
spack.hooks.post_install(pkg.spec, explicit)
Expand Down Expand Up @@ -481,7 +493,7 @@ def _process_binary_cache_tarball(

with timer.measure("install"), spack.util.path.filter_padding():
binary_distribution.extract_tarball(
pkg.spec, download_result, unsigned=unsigned, force=False
pkg.spec, download_result, unsigned=unsigned, force=False, timer=timer
)

pkg.installed_from_binary_cache = True
Expand Down Expand Up @@ -2092,7 +2104,6 @@ def install(self) -> None:
# another process has a write lock so must be (un)installing
# the spec (or that process is hung).
ltype, lock = self._ensure_locked("read", pkg)

# Requeue the spec if we cannot get at least a read lock so we
# can check the status presumably established by another process
# -- failed, installed, or uninstalled -- on the next pass.
Expand Down Expand Up @@ -2372,8 +2383,7 @@ def run(self) -> bool:

# Stop the timer and save results
self.timer.stop()
with open(self.pkg.times_log_path, "w") as timelog:
self.timer.write_json(timelog)
_write_timer_json(self.pkg, self.timer, False)

print_install_test_log(self.pkg)
_print_timer(pre=self.pre, pkg_id=self.pkg_id, timer=self.timer)
Expand Down
4 changes: 2 additions & 2 deletions lib/spack/spack/test/util/timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ def test_timer_write():

deserialized = json.loads(json_buffer.getvalue())
assert deserialized == {
"phases": [{"name": "timer", "seconds": 1.0}],
"total": {"seconds": 3.0},
"phases": [{"name": "timer", "path": "timer", "seconds": 1.0, "count": 1}],
"total": 3.0,
}


Expand Down
111 changes: 80 additions & 31 deletions lib/spack/spack/util/timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,31 +13,32 @@
import sys
import time
from contextlib import contextmanager
from typing import Dict
from typing import Callable, Dict, List

from llnl.util.lang import pretty_seconds_formatter

import spack.util.spack_json as sjson

Interval = collections.namedtuple("Interval", ("begin", "end"))
TimerEvent = collections.namedtuple("TimerEvent", ("time", "running", "label"))
TimeTracker = collections.namedtuple("TimeTracker", ("total", "start", "count", "path"))

#: name for the global timer (used in start(), stop(), duration() without arguments)
global_timer_name = "_global"


class BaseTimer:
def start(self, name=global_timer_name):
def start(self, name=None):
pass

def stop(self, name=global_timer_name):
def stop(self, name=None):
pass

def duration(self, name=global_timer_name):
def duration(self, name=None):
return 0.0

@contextmanager
def measure(self, name):
yield
yield self

@property
def phases(self):
Expand All @@ -60,16 +61,18 @@ class NullTimer(BaseTimer):
class Timer(BaseTimer):
"""Simple interval timer"""

def __init__(self, now=time.time):
def __init__(self, now: Callable[[], float] = time.time):
"""
Arguments:
now: function that gives the seconds since e.g. epoch
"""
self._now = now
self._timers: Dict[str, Interval] = collections.OrderedDict()
self._timers: Dict[str, TimeTracker] = {}
self._timer_stack: List[str] = []

# _global is the overal timer since the instance was created
self._timers[global_timer_name] = Interval(self._now(), end=None)
self._events: List[TimerEvent] = []
# Push start event
self._events.append(TimerEvent(self._now(), True, global_timer_name))

def start(self, name=global_timer_name):
"""
Expand All @@ -79,7 +82,7 @@ def start(self, name=global_timer_name):
name (str): Optional name of the timer. When no name is passed, the
global timer is started.
"""
self._timers[name] = Interval(self._now(), None)
self._events.append(TimerEvent(self._now(), True, name))

def stop(self, name=global_timer_name):
"""
Expand All @@ -90,10 +93,7 @@ def stop(self, name=global_timer_name):
name (str): Optional name of the timer. When no name is passed, all
timers are stopped.
"""
interval = self._timers.get(name, None)
if not interval:
return
self._timers[name] = Interval(interval.begin, self._now())
self._events.append(TimerEvent(self._now(), False, name))

def duration(self, name=global_timer_name):
"""
Expand All @@ -107,13 +107,13 @@ def duration(self, name=global_timer_name):
Returns:
float: duration of timer.
"""
try:
interval = self._timers[name]
except KeyError:
self._flatten()
if name in self._timers:
if name in self._timer_stack:
return self._timers[name].total + (self._now() - self._timers[name].start)
return self._timers[name].total
else:
return 0.0
# Take either the interval end, the global timer, or now.
end = interval.end or self._timers[global_timer_name].end or self._now()
return end - interval.begin

@contextmanager
def measure(self, name):
Expand All @@ -123,23 +123,72 @@ def measure(self, name):
Arguments:
name (str): Name of the timer
"""
begin = self._now()
yield
self._timers[name] = Interval(begin, self._now())
self.start(name)
yield self
self.stop(name)

@property
def phases(self):
"""Get all named timers (excluding the global/total timer)"""
return [k for k in self._timers.keys() if k != global_timer_name]

def write_json(self, out=sys.stdout):
self._flatten()
return [k for k in self._timers.keys() if not k == global_timer_name]

def _flatten(self):
for event in self._events:
if event.running:
if event.label not in self._timer_stack:
self._timer_stack.append(event.label)
# Only start the timer if it is on top of the stack
# restart doesn't work after a subtimer is started
if event.label == self._timer_stack[-1]:
timer_path = "/".join(self._timer_stack[1:])
tracker = self._timers.get(
event.label, TimeTracker(0.0, event.time, 0, timer_path)
Copy link
Member

@haampie haampie Aug 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(let's fix this in a follow-up PR) This always allocates a TimeTracker(...), also when self._timers[event.label] exists. So, then it's better to use a standard if-else, or use defaultdict.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was not aware of deafultdict, noted for the update later to improve the code clarity.

)
assert tracker.path == timer_path
self._timers[event.label] = TimeTracker(
tracker.total, event.time, tracker.count, tracker.path
)
else: # if not event.running:
if event.label in self._timer_stack:
Copy link
Member

@haampie haampie Aug 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if this is not true? (let's fix this in a follow-up PR if necessary)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it isn't true, you are stopping a timer that was never started, so it is just a noop. Is there something you think should be done in that case?

index = self._timer_stack.index(event.label)
for label in self._timer_stack[index:]:
tracker = self._timers[label]
self._timers[label] = TimeTracker(
tracker.total + (event.time - tracker.start),
None,
tracker.count + 1,
tracker.path,
)
self._timer_stack = self._timer_stack[: max(0, index)]
Copy link
Member

@haampie haampie Aug 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(let's fix this in a follow-up PR) there's no need to use max here. In fact del self._timer_stack[index:] is what you want?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basically I am trying to truncate all of the timer labels in the timer stack after and including "index"

# clear events
self._events = []
Comment on lines +136 to +165
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you follow up with a PR that makes this code actually readable? It's hard to see the intent, probably also for you after a couple months.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like more comments, or re-written in a more pythonic way?


def write_json(self, out=sys.stdout, extra_attributes={}):
"""Write a json object with times to file"""
phases = [{"name": p, "seconds": self.duration(p)} for p in self.phases]
times = {"phases": phases, "total": {"seconds": self.duration()}}
out.write(sjson.dump(times))
self._flatten()
data = {
"total": self._timers[global_timer_name].total,
"phases": [
{
"name": phase,
"path": self._timers[phase].path,
"seconds": self._timers[phase].total,
"count": self._timers[phase].count,
}
for phase in self.phases
],
}
if extra_attributes:
data.update(extra_attributes)
if out:
out.write(sjson.dump(data))
else:
return data

def write_tty(self, out=sys.stdout):
"""Write a human-readable summary of timings"""
"""Write a human-readable summary of timings (depth is 1)"""
self._flatten()

times = [self.duration(p) for p in self.phases]

Expand Down
6 changes: 5 additions & 1 deletion share/spack/gitlab/cloud_pipelines/configs/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ ci:
- k=$CI_GPG_KEY_ROOT/intermediate_ci_signing_key.gpg; [[ -r $k ]] && spack gpg trust $k
- k=$CI_GPG_KEY_ROOT/spack_public_key.gpg; [[ -r $k ]] && spack gpg trust $k
script::
- spack --color=always --backtrace ci rebuild --tests > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
- - spack --color=always --backtrace ci rebuild --tests > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
- - spack python ${CI_PROJECT_DIR}/share/spack/gitlab/cloud_pipelines/scripts/common/aggregate_package_logs.spack.py
kwryankrattiger marked this conversation as resolved.
Show resolved Hide resolved
--prefix /home/software/spack:${CI_PROJECT_DIR}
--log install_times.json
${SPACK_ARTIFACTS_ROOT}/user_data/install_times.json
after_script:
- - cat /proc/loadavg || true
variables:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env spack-python
"""
kwryankrattiger marked this conversation as resolved.
Show resolved Hide resolved
This script is meant to be run using:
`spack python aggregate_logs.spack.py`
"""

import os


def find_logs(prefix, filename):
for root, _, files in os.walk(prefix):
if filename in files:
yield os.path.join(root, filename)


if __name__ == "__main__":
import json
from argparse import ArgumentParser

parser = ArgumentParser("aggregate_logs")
parser.add_argument("output_file")
parser.add_argument("--log", default="install_times.json")
parser.add_argument("--prefix", required=True)

args = parser.parse_args()

prefixes = [p for p in args.prefix.split(":") if os.path.exists(p)]

# Aggregate the install timers into a single json
data = []
kwryankrattiger marked this conversation as resolved.
Show resolved Hide resolved
for prefix in prefixes:
time_logs = find_logs(prefix, args.log)
for log in time_logs:
with open(log) as fd:
data.append(json.load(fd))

with open(args.output_file, "w") as fd:
json.dump(data, fd)