Merge pull request #735 from mv1388/show_registered_callbacks

Print the list of registered callbacks at the start of the training
mv1388 · Aug 13, 2022 · 6e10ad7 · 6e10ad7
2 parents 7ad2e70 + ee846d6
commit 6e10ad7
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 9 deletions.
diff --git a/aitoolbox/torchtrain/train_loop/components/callback_handler.py b/aitoolbox/torchtrain/train_loop/components/callback_handler.py
@@ -43,7 +43,7 @@ def __init__(self, train_loop_obj):
             self.cbs_on_after_batch_prediction
         ]
 
-    def register_callbacks(self, callbacks, cache_callbacks=False):
+    def register_callbacks(self, callbacks, cache_callbacks=False, print_callbacks=False):
         """Register TrainLoop object reference inside the listed callbacks when the TrainLoop is created
 
         Normally, this is called from inside the train loop by the TrainLoop itself. Basically train loop "registers"
@@ -56,6 +56,8 @@ def register_callbacks(self, callbacks, cache_callbacks=False):
             cache_callbacks (bool): should the provided callbacks be cached and not yet registered. First subsequent
                 time this method is called without ``cache_callbacks`` enabled all the previously cached callbacks
                 are added and also registered with the current list of callbacks.
+            print_callbacks (bool): after registering the provided callbacks also print the list of registered callbacks
+                which will be executed during the run of the train loop
 
         Returns:
             None
@@ -87,6 +89,9 @@ def register_callbacks(self, callbacks, cache_callbacks=False):
             #   time this prevents their duplication int the execution-position-split self.registered_cbs.
             self.split_on_execution_position(callbacks, register_train_loop=False)
 
+        if print_callbacks:
+            self.print_registered_callback_names()
+
     def should_enable_callback(self, callback):
         """Determine if callback should be enabled and executed to be in accordance with the GPU device setting
 
@@ -250,6 +255,8 @@ def print_callback_info(callback_list):
                           for callback in callback_list])
 
     def print_registered_callback_names(self):
+        if self.train_loop_obj.ddp_training_mode:
+            print(f'*** On device {self.train_loop_obj.device.index} ({self.train_loop_obj.device}) ***')
         print(self)
 
     def __len__(self):

diff --git a/aitoolbox/torchtrain/train_loop/train_loop.py b/aitoolbox/torchtrain/train_loop/train_loop.py
@@ -32,7 +32,7 @@ def __init__(self, model,
                  train_loader, validation_loader, test_loader,
                  optimizer, criterion,
                  collate_batch_pred_fn=append_predictions, pred_transform_fn=torch_cat_transf,
-                 end_auto_eval=True, lazy_experiment_save=False,
+                 end_auto_eval=True, lazy_experiment_save=False, print_callbacks=False,
                  gpu_mode='single', cuda_device_idx=None, use_amp=False):
         """Core PyTorch TrainLoop supporting the model training and target prediction
 
@@ -58,6 +58,8 @@ def __init__(self, model,
             lazy_experiment_save (bool): when in lazy mode experiment tracking components will create the experiment
                 folder only after some training results are available (possibly at the end of the first epoch) instead
                 of at the beginning of training.
+            print_callbacks (bool): at the start of training print the list of registered callbacks
+                which will be executed during the run of the train loop
             gpu_mode (str): GPU training mode selection. TrainLoop supports different GPU training modes by
                 specifying one of the following:
 
@@ -92,6 +94,7 @@ def __init__(self, model,
         self.pred_transform_fn = pred_transform_fn
         self.end_auto_eval = end_auto_eval
         self.lazy_experiment_save = lazy_experiment_save
+        self.print_callbacks = print_callbacks
 
         self.num_optimizers = 1 if not isinstance(self.optimizer, MultiOptimizer) else len(self.optimizer)
 
@@ -210,7 +213,7 @@ def _train(self, num_epochs, num_iterations, callbacks=None, grad_accumulation=1
         self.num_iterations = num_iterations
         self.grad_accumulation = grad_accumulation
 
-        self.callbacks_handler.register_callbacks(callbacks)
+        self.callbacks_handler.register_callbacks(callbacks, print_callbacks=self.print_callbacks)
 
         self.model = self.model.to(self.device)
         if self.criterion is not None:

diff --git a/aitoolbox/torchtrain/train_loop/train_loop_tracking.py b/aitoolbox/torchtrain/train_loop/train_loop_tracking.py
@@ -17,7 +17,7 @@ def __init__(self, model,
                  rm_subopt_local_models=False, num_best_checkpoints_kept=2,
                  iteration_save_freq=0,
                  collate_batch_pred_fn=append_predictions, pred_transform_fn=torch_cat_transf,
-                 end_auto_eval=True, lazy_experiment_save=False,
+                 end_auto_eval=True, lazy_experiment_save=False, print_callbacks=False,
                  gpu_mode='single', cuda_device_idx=None, use_amp=False):
         """TrainLoop with the automatic model check-pointing at the end of each epoch
 
@@ -60,6 +60,8 @@ def __init__(self, model,
             lazy_experiment_save (bool): when in lazy mode experiment tracking components will create the experiment
                 folder only after some training results are available (possibly at the end of the first epoch) instead
                 of at the beginning of training.
+            print_callbacks (bool): at the start of training print the list of registered callbacks
+                which will be executed during the run of the train loop
             gpu_mode (str): GPU training mode selection. TrainLoop supports different GPU training modes by
                 specifying one of the following:
 
@@ -77,7 +79,7 @@ def __init__(self, model,
         """
         TrainLoop.__init__(self, model, train_loader, validation_loader, test_loader, optimizer, criterion,
                            collate_batch_pred_fn, pred_transform_fn,
-                           end_auto_eval, lazy_experiment_save,
+                           end_auto_eval, lazy_experiment_save, print_callbacks,
                            gpu_mode, cuda_device_idx, use_amp)
         self.project_name = project_name
         self.experiment_name = experiment_name
@@ -129,7 +131,7 @@ def __init__(self, model,
                  hyperparams, val_result_package=None, test_result_package=None,
                  cloud_save_mode='s3', bucket_name='model-result', cloud_dir_prefix='', source_dirs=(),
                  collate_batch_pred_fn=append_predictions, pred_transform_fn=torch_cat_transf,
-                 end_auto_eval=True, lazy_experiment_save=False,
+                 end_auto_eval=True, lazy_experiment_save=False, print_callbacks=False,
                  gpu_mode='single', cuda_device_idx=None, use_amp=False):
         """TrainLoop with the model performance evaluation and final model saving at the end of the training process
 
@@ -169,6 +171,8 @@ def __init__(self, model,
             lazy_experiment_save (bool): when in lazy mode experiment tracking components will create the experiment
                 folder only after some training results are available (possibly at the end of the first epoch) instead
                 of at the beginning of training.
+            print_callbacks (bool): at the start of training print the list of registered callbacks
+                which will be executed during the run of the train loop
             gpu_mode (str): GPU training mode selection. TrainLoop supports different GPU training modes by
                 specifying one of the following:
 
@@ -186,7 +190,7 @@ def __init__(self, model,
         """
         TrainLoop.__init__(self, model, train_loader, validation_loader, test_loader, optimizer, criterion,
                            collate_batch_pred_fn, pred_transform_fn,
-                           end_auto_eval, lazy_experiment_save,
+                           end_auto_eval, lazy_experiment_save, print_callbacks,
                            gpu_mode, cuda_device_idx, use_amp)
         self.project_name = project_name
         self.experiment_name = experiment_name
@@ -242,7 +246,7 @@ def __init__(self, model,
                  rm_subopt_local_models=False, num_best_checkpoints_kept=2,
                  iteration_save_freq=0,
                  collate_batch_pred_fn=append_predictions, pred_transform_fn=torch_cat_transf,
-                 end_auto_eval=True, lazy_experiment_save=False,
+                 end_auto_eval=True, lazy_experiment_save=False, print_callbacks=False,
                  gpu_mode='single', cuda_device_idx=None, use_amp=False):
         """TrainLoop both saving model check-pointing at the end of each epoch and model performance reporting
             and model saving at the end of the training process
@@ -290,6 +294,8 @@ def __init__(self, model,
             lazy_experiment_save (bool): when in lazy mode experiment tracking components will create the experiment
                 folder only after some training results are available (possibly at the end of the first epoch) instead
                 of at the beginning of training.
+            print_callbacks (bool): at the start of training print the list of registered callbacks
+                which will be executed during the run of the train loop
             gpu_mode (str): GPU training mode selection. TrainLoop supports different GPU training modes by
                 specifying one of the following:
 
@@ -316,7 +322,7 @@ def __init__(self, model,
                                   hyperparams, val_result_package, test_result_package,
                                   cloud_save_mode, bucket_name, cloud_dir_prefix, source_dirs,
                                   collate_batch_pred_fn, pred_transform_fn,
-                                  end_auto_eval, lazy_experiment_save,
+                                  end_auto_eval, lazy_experiment_save, print_callbacks,
                                   gpu_mode, cuda_device_idx, use_amp)
         self.rm_subopt_local_models = rm_subopt_local_models
         self.iteration_save_freq = iteration_save_freq

diff --git a/dist/aitoolbox-1.6.1-py3-none-any.whl b/dist/aitoolbox-1.6.1-py3-none-any.whl
diff --git a/dist/aitoolbox-1.6.1.tar.gz b/dist/aitoolbox-1.6.1.tar.gz