Skip to content

Commit

Permalink
Make global_timer a bit better by e.g. letting you add a description.
Browse files Browse the repository at this point in the history
  • Loading branch information
markmuetz committed Feb 21, 2024
1 parent 78d14f9 commit eb81ae3
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 66 deletions.
104 changes: 64 additions & 40 deletions remake/global_timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np
import pandas as pd
from tabulate import tabulate
from tabulate import tabulate, SEPARATING_LINE


class GlobalTimer:
Expand All @@ -14,67 +14,91 @@ def __init__(self, name):
self.timers = defaultdict(list)
self.last_time = None
self.curr_key = None
self.sub_timers = []
self.parent_timer = None
self.curr_level = 1
self.last_time_at_level_stack = []
self.descriptions = {}
self.internal_time = []

def __call__(self, key, description=''):
call_time = pd.Timestamp.now()
GlobalTimer.active_timer = self
level = len(str(key).split('.'))
combined_level = max(len(str(k).split('.')) for k in [self.curr_key, key])

def __call__(self, key):
time = pd.Timestamp.now()
if self.curr_key is not None:
self.timers[(self.curr_key, key)].append(time - self.last_time)
self.timers[(self.curr_key, key, combined_level)].append(time - self.last_time)
self.last_time = time

self.descriptions[(key, combined_level)] = description
if level > self.curr_level:
self.last_time_at_level_stack.append((self.curr_key, self.last_time))
elif level < self.curr_level:
last_key, last_time_at_level = self.last_time_at_level_stack.pop()
if self.curr_key is not None:
self.timers[(last_key, key, level)].append(time - last_time_at_level)

self.curr_level = level
self.curr_key = key
self.last_time = pd.Timestamp.now()
self.internal_time.append(pd.Timestamp.now() - call_time)

def __str__(self):
def output(self, max_level=None):
output = []
def timer_sorter(item):
k1, k2, level = item[0]
return str(k1), level

for k in self.timers.keys():
k1, k2 = k
timers = self.timers[k]
# print(timers)
sorted_timers = dict(sorted(self.timers.items(), key=timer_sorter))
total_time = 0
for key in sorted_timers.keys():
k1, k2, combined_level = key
if combined_level > 1:
continue
timers = self.timers[key]
times = [t.total_seconds() for t in timers]
total_time += np.sum(times)

for key in sorted_timers.keys():
k1, k2, combined_level = key
if max_level and combined_level > max_level:
continue
level = len(str(k1).split('.'))
description = self.descriptions.get((k1, level), '')
indent = (combined_level - 1) * '--'

timers = self.timers[key]
times = [t.total_seconds() for t in timers]
# print(times)
time_total = np.sum(times)
time_mean = np.mean(times)
time_std = np.std(times)
percent = time_total / total_time * 100
count = len(times)
output.append((f'{k1} -> {k2}', f'{time_total:.2g}s', f'{time_mean:.2g}s', f'(+/- {time_std:.2g}s)', f'{count}'))
return f'{self.name}\n' + '=' * len(self.name) + '\n' + tabulate(output, headers=('tx', 'total', 'mean', 'std', 'count'))
output.append((f'{indent}{k1} -> {k2}', f'{time_total:.2g}s', f'{percent:04.1f}%', f'{time_mean:.2g}s', f'(+/- {time_std:.2g}s)', f'{count}', description))

def start(self):
# TODO:
raise NotImplemented
if not GlobalTimer.active_timer:
GlobalTimer.active_timer = self
else:
if self not in GlobalTimer.active_timer.sub_timers:
GlobalTimer.active_timer.sub_timers.append(self)
self.parent_timer = GlobalTimer.active_timer
GlobalTimer.active_timer = self
self('__start__')

def end(self):
# TODO:
raise NotImplemented
if GlobalTimer.active_timer == self:
if self.parent_timer:
GlobalTimer.active_timer = self.parent_timer
self.parent_timer = None
else:
GlobalTimer.active_timer = None
else:
raise Exception('Did you forget to start or end a timer?')
self('__end__')
internal_times = [t.total_seconds() for t in self.internal_time]
output.append(SEPARATING_LINE)
output.append(('total', f'{total_time:.2g}s', '', '', '', '', ''))
output.append(SEPARATING_LINE)
output.append(('internal', f'{np.sum(internal_times):.2g}s', '', '', '', '', ''))
return f'{self.name}\n' + '=' * len(self.name) + '\n' + tabulate(output, headers=('tx', 'total', 'percent', 'mean', 'std', 'count', 'desc'))

def __str__(self):
return self.output()

def reset(self):
self.timers = defaultdict(list)
self.last_time = None
self.curr_key = None


def get_global_timer(name):
if name in GlobalTimer.global_timers:
def get_global_timer(name=None):
if name is None:
if not GlobalTimer.active_timer:
global_timer = GlobalTimer('root')
GlobalTimer.global_timers['root'] = global_timer
GlobalTimer.active_timer = global_timer
return GlobalTimer.active_timer
elif name in GlobalTimer.global_timers:
return GlobalTimer.global_timers[name]
else:
global_timer = GlobalTimer(name)
Expand Down
16 changes: 11 additions & 5 deletions remake/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def __init__(self, task_control_name, dotremake_dir, content_checks, track_files
self.task_metadata_map = {}

def create_task_metadata(self, task):
create_task_metadata_timer = get_global_timer('create_task_metadata_timer')
create_task_metadata_timer('4.4.0')
timer = get_global_timer()
timer('4.4.0')

task_inputs_metadata_map = {}
task_outputs_metadata_map = {}
Expand All @@ -71,20 +71,21 @@ def create_task_metadata(self, task):
else:
input_md = self.path_metadata_map[input_path]
task_inputs_metadata_map[input_path] = input_md
create_task_metadata_timer('4.4.1')
timer('4.4.1')

for output_path, special_output_path in zip(task.outputs.values(), task.special_outputs.values()):
if output_path not in self.path_metadata_map:
output_md = self._create_path_metadata(output_path, special_output_path)
else:
output_md = self.path_metadata_map[output_path]
task_outputs_metadata_map[output_path] = output_md
create_task_metadata_timer('4.4.2')
timer('4.4.2')
timer('4.4.3', 'create TaskMetadata instance')
task_md = TaskMetadata(self.task_control_name, self.dotremake_dir,
task, task_inputs_metadata_map, task_outputs_metadata_map,
self.content_checks)
self.task_metadata_map[task] = task_md
create_task_metadata_timer('4.4.3')
timer('4.4.4')
return task_md

def check_task_status(self, task):
Expand Down Expand Up @@ -119,6 +120,8 @@ def _create_path_metadata(self, path, special_input_path):
class TaskMetadata:
def __init__(self, task_control_name, dotremake_dir, task,
inputs_metadata_map, outputs_metadata_map, content_checks, track_files=False):
timer = get_global_timer()
timer('4.4.3.0')
self.task_control_name = task_control_name
self.dotremake_dir = dotremake_dir
self.metadata_dir = dotremake_dir / METADATA_VERSION
Expand All @@ -128,8 +131,10 @@ def __init__(self, task_control_name, dotremake_dir, task,
self.content_checks = content_checks
self.track_files = track_files

timer('4.4.3.1')
self.task_path_hash_key = self.task.path_hash_key()

timer('4.4.3.2')
self.metadata = {}
self.new_metadata = {'task_control_name': task_control_name}
self.requires_rerun = True
Expand All @@ -147,6 +152,7 @@ def __init__(self, task_control_name, dotremake_dir, task,

self.task_status_path = self.task_status_dir / 'task.status'
self.log_path = self.task_status_dir / 'task.log'
timer('4.4.3.3')

def update_status(self, status):
self.task_status_path.parent.mkdir(parents=True, exist_ok=True)
Expand Down
15 changes: 11 additions & 4 deletions remake/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class Task(BaseTask):

def __init__(self, task_ctrl, func, inputs, outputs,
*, force=False, depends_on=tuple()):
task_init_timer = get_global_timer(str(self.__class__) + '__init__')
task_init_timer = get_global_timer()
task_init_timer('2.0')
super().__init__(task_ctrl)
# self.remake_on = True
Expand All @@ -77,7 +77,7 @@ def __init__(self, task_ctrl, func, inputs, outputs,

if not callable(func):
raise ValueError(f'{func} is not callable')
task_init_timer('2.2')
task_init_timer('2.2', 'get func source')

self.func = func
if self.func in Task.task_func_cache:
Expand Down Expand Up @@ -172,13 +172,20 @@ def complete(self):
return True

def path_hash_key(self):
timer = get_global_timer()
timer('4.4.3.1.0')
if not self._path_hash_key:
timer('4.4.3.1.1')
h = sha1(self.func.__code__.co_name.encode())
for input_path in self.special_inputs.values():
timer('4.4.3.1.2')
for input_path in self.inputs.values():
h.update(str(input_path).encode())
for output_path in self.special_outputs.values():
timer('4.4.3.1.3')
for output_path in self.outputs.values():
h.update(str(output_path).encode())
timer('4.4.3.1.4')
self._path_hash_key = h.hexdigest()
timer('4.4.3.1.5')
return self._path_hash_key

def run_task_rule(self, force=False):
Expand Down
4 changes: 2 additions & 2 deletions remake/task_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def remaining_tasks(self):

@check_finalized(False)
def add(self, task):
task_ctrl_add_timer = get_global_timer('task_ctrl_add')
task_ctrl_add_timer = get_global_timer()
task_ctrl_add_timer(4.0)

for output in task.outputs.values():
Expand Down Expand Up @@ -393,7 +393,7 @@ def finalize(self):
return self

def _assign_tasks(self):
task_ctrl_assign_task_timer = get_global_timer('task_ctrl_assign_task_timer')
task_ctrl_assign_task_timer = get_global_timer()
# Assign each task to one of four groups:
# cannot_run: not possible to run task (missing inputs).
# completed: task has been run and does not need to be rerun.
Expand Down
18 changes: 3 additions & 15 deletions remake/task_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __new__(mcs, clsname, bases, attrs):
outputs = create_outputs_fn(**fmt_dict)
# Creates an instance of the class. N.B. TaskRule inherits from Task, so Task.__init__ is
# called here.
loop_timer(2)
loop_timer(2, 'create task instance')
task = newcls(remake.task_ctrl, attrs['rule_run'], inputs, outputs,
depends_on=depends_on)
loop_timer(3)
Expand All @@ -99,24 +99,12 @@ def __new__(mcs, clsname, bases, attrs):
setattr(task, kk, vv)
else:
setattr(task, k, v)
loop_timer(4)
loop_timer(4, 'append task')
newcls.tasks.append(task)
remake.task_ctrl.add(task)
loop_timer(5)

logger.debug('\n' + str(loop_timer))

task_ctrl_add_timer = get_global_timer('task_ctrl_add')
logger.debug('\n' + str(task_ctrl_add_timer))
task_ctrl_add_timer.reset()

create_task_metadata_timer = get_global_timer('create_task_metadata_timer')
logger.debug('\n' + str(create_task_metadata_timer))
create_task_metadata_timer.reset()

PathMetadata_timer = get_global_timer('PathMetadata_timer')
logger.debug('\n' + str(PathMetadata_timer))
PathMetadata_timer.reset()
logger.debug('\n' + loop_timer.output())
else:
logger.debug(f' creating instance of {clsname}')
inputs = create_inputs_fn(**{})
Expand Down

0 comments on commit eb81ae3

Please sign in to comment.