Skip to content

Commit

Permalink
Use global_timer to work out how to speed up loading remakefile.
Browse files Browse the repository at this point in the history
N.B. with no finalize.
  • Loading branch information
markmuetz committed Sep 27, 2023
1 parent 67f70a0 commit 3f34cf7
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 17 deletions.
41 changes: 38 additions & 3 deletions remake/global_timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@

class GlobalTimer:
global_timers = {}
active_timer = None

def __init__(self, name):
self.name = name
self.name = str(name)
self.timers = defaultdict(list)
self.last_time = None
self.curr_key = None
self.sub_timers = []
self.parent_timer = None

def __call__(self, key):
time = pd.Timestamp.now()
Expand All @@ -29,10 +32,42 @@ def __str__(self):
k1, k2 = k
timers = self.timers[k]
times_ms = [t.microseconds for t in timers]
time_total_ms = np.sum(times_ms)
time_mean_ms = np.mean(times_ms)
time_std_ms = np.std(times_ms)
output.append((f'{k1} -> {k2}', f'{time_mean_ms / 1e6:.2g}s', f'(+/- {time_std_ms / 1e6:.2g}s)'))
return f'{self.name}\n' + tabulate(output, headers=('tx', 'mean', 'std'))
output.append((f'{k1} -> {k2}', f'{time_total_ms / 1e6:.2g}s', f'{time_mean_ms / 1e6:.2g}s', f'(+/- {time_std_ms / 1e6:.2g}s)'))
return f'{self.name}\n' + '=' * len(self.name) + '\n' + tabulate(output, headers=('tx', 'total', 'mean', 'std'))

def start(self):
# TODO:
raise NotImplemented
if not GlobalTimer.active_timer:
GlobalTimer.active_timer = self
else:
if self not in GlobalTimer.active_timer.sub_timers:
GlobalTimer.active_timer.sub_timers.append(self)
self.parent_timer = GlobalTimer.active_timer
GlobalTimer.active_timer = self
self('__start__')

def end(self):
# TODO:
raise NotImplemented
if GlobalTimer.active_timer == self:
if self.parent_timer:
GlobalTimer.active_timer = self.parent_timer
self.parent_timer = None
else:
GlobalTimer.active_timer = None
else:
raise Exception('Did you forget to start or end a timer?')
self('__end__')


def reset(self):
self.timers = defaultdict(list)
self.last_time = None
self.curr_key = None


def get_global_timer(name):
Expand Down
6 changes: 4 additions & 2 deletions remake/special_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class SpecialPaths:
>>> demo = Remake(special_paths=special_paths)
"""
def __init__(self, **paths):
if 'CWD' not in paths:
paths['CWD'] = Path.cwd()
# if 'CWD' not in paths:
# paths['CWD'] = Path.cwd()
for k, v in paths.items():
assert isinstance(k, str), f'{k} not a string'
assert isinstance(v, Path) or isinstance(v, str), f'{v} not a Path or string'
Expand All @@ -53,6 +53,8 @@ def map_special_paths(special_paths, paths):
>>> map_special_paths(special_paths, {'path1': Path('/A/data/path')})
{'path1': PosixPath('DATA/path')}
"""
if not special_paths.paths:
return paths
mapped_paths = {}
for path_name, path in paths.items():
mapped_path = None
Expand Down
18 changes: 15 additions & 3 deletions remake/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from remake.setup_logging import add_file_logging, remove_file_logging
from remake.special_paths import map_special_paths
from remake.remake_exceptions import FileNotCreated
from remake.global_timer import get_global_timer

logger = getLogger(__name__)

Expand Down Expand Up @@ -51,6 +52,8 @@ class Task(BaseTask):

def __init__(self, task_ctrl, func, inputs, outputs,
*, force=False, depends_on=tuple()):
task_init_timer = get_global_timer(str(self.__class__) + '__init__')
task_init_timer(0)
super().__init__(task_ctrl)
# self.remake_on = True
self.depends_on_sources = []
Expand All @@ -70,9 +73,11 @@ def __init__(self, task_ctrl, func, inputs, outputs,
Task.task_func_cache[depend_obj] = depend_func_source

self.depends_on = depends_on
task_init_timer(1)

if not callable(func):
raise ValueError(f'{func} is not callable')
task_init_timer(2)

self.func = func
if self.func in Task.task_func_cache:
Expand Down Expand Up @@ -106,16 +111,23 @@ def __init__(self, task_ctrl, func, inputs, outputs,

if not outputs:
raise Exception('outputs must be set')

self.inputs = {k: Path(v).absolute() for k, v in inputs.items()}
self.outputs = {k: Path(v).absolute() for k, v in outputs.items()}
task_init_timer(3)

task_init_timer(3.1)
#self.inputs = {k: Path(v).absolute() for k, v in inputs.items()}
#self.outputs = {k: Path(v).absolute() for k, v in outputs.items()}
self.inputs = inputs
self.outputs = outputs
task_init_timer(3.2)
self.special_inputs = map_special_paths(self.task_ctrl.special_paths, self.inputs)
self.special_outputs = map_special_paths(self.task_ctrl.special_paths, self.outputs)
task_init_timer(3.3)
self.result = None
self.rerun_on_mtime = True
self.tmp_outputs = {}
self.logger = None
self._path_hash_key = None
task_init_timer(4)

def __repr__(self):
return str(self)
Expand Down
38 changes: 29 additions & 9 deletions remake/task_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from remake.remake_base import Remake
from remake.task_query_set import TaskQuerySet
from remake.util import format_path
from remake.global_timer import get_global_timer

logger = getLogger(__name__)

Expand Down Expand Up @@ -63,22 +64,31 @@ def __new__(mcs, clsname, bases, attrs):
var_matrix = attrs.get('var_matrix', None)
depends_on.extend(attrs.get('depends_on', []))
logger.debug(f' depends on: {depends_on}')
loop_timer = get_global_timer(str(clsname))

create_inputs_fn = RemakeMetaclass._get_create_inputs_ouputs_fn(attrs['rule_inputs'])
create_outputs_fn = RemakeMetaclass._get_create_inputs_ouputs_fn(attrs['rule_outputs'])

if var_matrix:
all_loop_vars = list(itertools.product(*var_matrix.values()))
logger.debug(f' creating {len(all_loop_vars)} instances of {clsname}')

for loop_vars in all_loop_vars:
loop_timer(0)
# e.g. var_matrix = {'a': [1, 2], 'b': [3, 4]}
# run for [(1, 3), (1, 4), (2, 3), (2, 4)].
fmt_dict = {k: v for k, v in zip(var_matrix.keys(), loop_vars)}
fmt_dict = RemakeMetaclass._check_modify_fmt_dict(fmt_dict)
loop_timer(1)
# e.g. for (1, 3): fmt_dict = {'a': 1, 'b': 3}
inputs = RemakeMetaclass._create_inputs_ouputs(attrs['rule_inputs'], fmt_dict)
outputs = RemakeMetaclass._create_inputs_ouputs(attrs['rule_outputs'], fmt_dict)
inputs = create_inputs_fn(**fmt_dict)
outputs = create_outputs_fn(**fmt_dict)
# Creates an instance of the class. N.B. TaskRule inherits from Task, so Task.__init__ is
# called here.
loop_timer(2)
task = newcls(remake.task_ctrl, attrs['rule_run'], inputs, outputs,
depends_on=depends_on)
loop_timer(3)
# Set up the instance variables so that e.g. within TaskRule.rule_run, self.a == 1.
for k, v in zip(var_matrix.keys(), loop_vars):
if isinstance(k, tuple):
Expand All @@ -88,12 +98,20 @@ def __new__(mcs, clsname, bases, attrs):
setattr(task, kk, vv)
else:
setattr(task, k, v)
loop_timer(4)
newcls.tasks.append(task)
remake.task_ctrl.add(task)
loop_timer(5)
print(loop_timer)
task_init_timer = get_global_timer(str(newcls) + '__init__')
print(task_init_timer)
cond_input_timer = get_global_timer('cond_input_timer')
print(cond_input_timer)
cond_input_timer.reset()
else:
logger.debug(f' creating instance of {clsname}')
inputs = RemakeMetaclass._create_inputs_ouputs(attrs['rule_inputs'], {})
outputs = RemakeMetaclass._create_inputs_ouputs(attrs['rule_outputs'], {})
inputs = create_inputs_fn(**{})
outputs = create_outputs_fn(**{})
task = newcls(remake.task_ctrl, attrs['rule_run'],
inputs, outputs,
depends_on=depends_on)
Expand Down Expand Up @@ -129,18 +147,20 @@ def _check_modify_fmt_dict(fmt_dict):
return new_fmt_dict

@staticmethod
def _create_inputs_ouputs(rule_inputs_outputs, fmt_dict):
def _get_create_inputs_ouputs_fn(rule_inputs_outputs):
# This is a little gnarly.
# See: https://stackoverflow.com/questions/41921255/staticmethod-object-is-not-callable
# Method has not been bound yet, but you can call it using its __func__ attr.
# N.B. both are possible, if e.g. a second rule uses a first rule's method.
if hasattr(rule_inputs_outputs, '__func__'):
return rule_inputs_outputs.__func__(**fmt_dict)
return rule_inputs_outputs.__func__
elif callable(rule_inputs_outputs):
return rule_inputs_outputs(**fmt_dict)
return rule_inputs_outputs
else:
return {k.format(**fmt_dict): format_path(v, **fmt_dict)
for k, v in rule_inputs_outputs.items()}
def fn(**fmt_dict):
return {k.format(**fmt_dict): format_path(v, **fmt_dict)
for k, v in rule_inputs_outputs.items()}
return fn


class TaskRule(Task, metaclass=RemakeMetaclass):
Expand Down

0 comments on commit 3f34cf7

Please sign in to comment.