Skip to content

Commit

Permalink
Merge pull request #735 from mv1388/show_registered_callbacks
Browse files Browse the repository at this point in the history
Print the list of registered callbacks at the start of the training
  • Loading branch information
mv1388 committed Aug 13, 2022
2 parents 7ad2e70 + ee846d6 commit 6e10ad7
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, train_loop_obj):
self.cbs_on_after_batch_prediction
]

def register_callbacks(self, callbacks, cache_callbacks=False):
def register_callbacks(self, callbacks, cache_callbacks=False, print_callbacks=False):
"""Register TrainLoop object reference inside the listed callbacks when the TrainLoop is created
Normally, this is called from inside the train loop by the TrainLoop itself. Basically train loop "registers"
Expand All @@ -56,6 +56,8 @@ def register_callbacks(self, callbacks, cache_callbacks=False):
cache_callbacks (bool): should the provided callbacks be cached and not yet registered. First subsequent
time this method is called without ``cache_callbacks`` enabled all the previously cached callbacks
are added and also registered with the current list of callbacks.
print_callbacks (bool): after registering the provided callbacks also print the list of registered callbacks
which will be executed during the run of the train loop
Returns:
None
Expand Down Expand Up @@ -87,6 +89,9 @@ def register_callbacks(self, callbacks, cache_callbacks=False):
# time this prevents their duplication int the execution-position-split self.registered_cbs.
self.split_on_execution_position(callbacks, register_train_loop=False)

if print_callbacks:
self.print_registered_callback_names()

def should_enable_callback(self, callback):
"""Determine if callback should be enabled and executed to be in accordance with the GPU device setting
Expand Down Expand Up @@ -250,6 +255,8 @@ def print_callback_info(callback_list):
for callback in callback_list])

def print_registered_callback_names(self):
if self.train_loop_obj.ddp_training_mode:
print(f'*** On device {self.train_loop_obj.device.index} ({self.train_loop_obj.device}) ***')
print(self)

def __len__(self):
Expand Down
7 changes: 5 additions & 2 deletions aitoolbox/torchtrain/train_loop/train_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self, model,
train_loader, validation_loader, test_loader,
optimizer, criterion,
collate_batch_pred_fn=append_predictions, pred_transform_fn=torch_cat_transf,
end_auto_eval=True, lazy_experiment_save=False,
end_auto_eval=True, lazy_experiment_save=False, print_callbacks=False,
gpu_mode='single', cuda_device_idx=None, use_amp=False):
"""Core PyTorch TrainLoop supporting the model training and target prediction
Expand All @@ -58,6 +58,8 @@ def __init__(self, model,
lazy_experiment_save (bool): when in lazy mode experiment tracking components will create the experiment
folder only after some training results are available (possibly at the end of the first epoch) instead
of at the beginning of training.
print_callbacks (bool): at the start of training print the list of registered callbacks
which will be executed during the run of the train loop
gpu_mode (str): GPU training mode selection. TrainLoop supports different GPU training modes by
specifying one of the following:
Expand Down Expand Up @@ -92,6 +94,7 @@ def __init__(self, model,
self.pred_transform_fn = pred_transform_fn
self.end_auto_eval = end_auto_eval
self.lazy_experiment_save = lazy_experiment_save
self.print_callbacks = print_callbacks

self.num_optimizers = 1 if not isinstance(self.optimizer, MultiOptimizer) else len(self.optimizer)

Expand Down Expand Up @@ -210,7 +213,7 @@ def _train(self, num_epochs, num_iterations, callbacks=None, grad_accumulation=1
self.num_iterations = num_iterations
self.grad_accumulation = grad_accumulation

self.callbacks_handler.register_callbacks(callbacks)
self.callbacks_handler.register_callbacks(callbacks, print_callbacks=self.print_callbacks)

self.model = self.model.to(self.device)
if self.criterion is not None:
Expand Down
18 changes: 12 additions & 6 deletions aitoolbox/torchtrain/train_loop/train_loop_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, model,
rm_subopt_local_models=False, num_best_checkpoints_kept=2,
iteration_save_freq=0,
collate_batch_pred_fn=append_predictions, pred_transform_fn=torch_cat_transf,
end_auto_eval=True, lazy_experiment_save=False,
end_auto_eval=True, lazy_experiment_save=False, print_callbacks=False,
gpu_mode='single', cuda_device_idx=None, use_amp=False):
"""TrainLoop with the automatic model check-pointing at the end of each epoch
Expand Down Expand Up @@ -60,6 +60,8 @@ def __init__(self, model,
lazy_experiment_save (bool): when in lazy mode experiment tracking components will create the experiment
folder only after some training results are available (possibly at the end of the first epoch) instead
of at the beginning of training.
print_callbacks (bool): at the start of training print the list of registered callbacks
which will be executed during the run of the train loop
gpu_mode (str): GPU training mode selection. TrainLoop supports different GPU training modes by
specifying one of the following:
Expand All @@ -77,7 +79,7 @@ def __init__(self, model,
"""
TrainLoop.__init__(self, model, train_loader, validation_loader, test_loader, optimizer, criterion,
collate_batch_pred_fn, pred_transform_fn,
end_auto_eval, lazy_experiment_save,
end_auto_eval, lazy_experiment_save, print_callbacks,
gpu_mode, cuda_device_idx, use_amp)
self.project_name = project_name
self.experiment_name = experiment_name
Expand Down Expand Up @@ -129,7 +131,7 @@ def __init__(self, model,
hyperparams, val_result_package=None, test_result_package=None,
cloud_save_mode='s3', bucket_name='model-result', cloud_dir_prefix='', source_dirs=(),
collate_batch_pred_fn=append_predictions, pred_transform_fn=torch_cat_transf,
end_auto_eval=True, lazy_experiment_save=False,
end_auto_eval=True, lazy_experiment_save=False, print_callbacks=False,
gpu_mode='single', cuda_device_idx=None, use_amp=False):
"""TrainLoop with the model performance evaluation and final model saving at the end of the training process
Expand Down Expand Up @@ -169,6 +171,8 @@ def __init__(self, model,
lazy_experiment_save (bool): when in lazy mode experiment tracking components will create the experiment
folder only after some training results are available (possibly at the end of the first epoch) instead
of at the beginning of training.
print_callbacks (bool): at the start of training print the list of registered callbacks
which will be executed during the run of the train loop
gpu_mode (str): GPU training mode selection. TrainLoop supports different GPU training modes by
specifying one of the following:
Expand All @@ -186,7 +190,7 @@ def __init__(self, model,
"""
TrainLoop.__init__(self, model, train_loader, validation_loader, test_loader, optimizer, criterion,
collate_batch_pred_fn, pred_transform_fn,
end_auto_eval, lazy_experiment_save,
end_auto_eval, lazy_experiment_save, print_callbacks,
gpu_mode, cuda_device_idx, use_amp)
self.project_name = project_name
self.experiment_name = experiment_name
Expand Down Expand Up @@ -242,7 +246,7 @@ def __init__(self, model,
rm_subopt_local_models=False, num_best_checkpoints_kept=2,
iteration_save_freq=0,
collate_batch_pred_fn=append_predictions, pred_transform_fn=torch_cat_transf,
end_auto_eval=True, lazy_experiment_save=False,
end_auto_eval=True, lazy_experiment_save=False, print_callbacks=False,
gpu_mode='single', cuda_device_idx=None, use_amp=False):
"""TrainLoop both saving model check-pointing at the end of each epoch and model performance reporting
and model saving at the end of the training process
Expand Down Expand Up @@ -290,6 +294,8 @@ def __init__(self, model,
lazy_experiment_save (bool): when in lazy mode experiment tracking components will create the experiment
folder only after some training results are available (possibly at the end of the first epoch) instead
of at the beginning of training.
print_callbacks (bool): at the start of training print the list of registered callbacks
which will be executed during the run of the train loop
gpu_mode (str): GPU training mode selection. TrainLoop supports different GPU training modes by
specifying one of the following:
Expand All @@ -316,7 +322,7 @@ def __init__(self, model,
hyperparams, val_result_package, test_result_package,
cloud_save_mode, bucket_name, cloud_dir_prefix, source_dirs,
collate_batch_pred_fn, pred_transform_fn,
end_auto_eval, lazy_experiment_save,
end_auto_eval, lazy_experiment_save, print_callbacks,
gpu_mode, cuda_device_idx, use_amp)
self.rm_subopt_local_models = rm_subopt_local_models
self.iteration_save_freq = iteration_save_freq
Expand Down
Binary file modified dist/aitoolbox-1.6.1-py3-none-any.whl
Binary file not shown.
Binary file modified dist/aitoolbox-1.6.1.tar.gz
Binary file not shown.

0 comments on commit 6e10ad7

Please sign in to comment.