From 1e423a30d6bd793dbcd4f5147b621960f02cc08e Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Tue, 16 Nov 2021 17:25:55 +0000 Subject: [PATCH 1/4] Configure loggers --- nowcasting_dataset/consts.py | 2 ++ nowcasting_dataset/manager.py | 32 ++++++++++++++++++++++++++++++++ nowcasting_dataset/utils.py | 26 +++++++++++++++++++++++++- scripts/prepare_ml_data.py | 17 ++++++++++------- 4 files changed, 69 insertions(+), 8 deletions(-) diff --git a/nowcasting_dataset/consts.py b/nowcasting_dataset/consts.py index d3d65f8f..fa663484 100644 --- a/nowcasting_dataset/consts.py +++ b/nowcasting_dataset/consts.py @@ -107,3 +107,5 @@ "spatial_and_temporal_locations_of_each_example.csv" ) SPATIAL_AND_TEMPORAL_LOCATIONS_COLUMN_NAMES = ("t0_datetime_UTC", "x_center_OSGB", "y_center_OSGB") + +LOG_LEVELS = ("DEBUG", "INFO", "WARNING", "ERROR") diff --git a/nowcasting_dataset/manager.py b/nowcasting_dataset/manager.py index 5a180fed..15c03b58 100644 --- a/nowcasting_dataset/manager.py +++ b/nowcasting_dataset/manager.py @@ -56,6 +56,38 @@ def save_yaml_configuration(self): """Save configuration to the 'output_data' location""" config.save_yaml_configuration(configuration=self.config) + def configure_loggers( + self, + log_level: str, + names_of_selected_data_sources: Optional[list[str]] = ALL_DATA_SOURCE_NAMES, + ) -> None: + """Configure loggers. + + Print combined log to stdout. + Save combined log to self.config.output_data.filepath / combined.log + Save individual logs for each DataSource in + self.config.output_data.filepath / .log + """ + # Configure combined logger. + combined_log_filename = self.config.output_data.filepath / "combined.log" + nd_utils.configure_logging( + log_level=log_level, + logger_name=__name__, + handlers=[ + logging.StreamHandler(), + logging.FileHandler(combined_log_filename, mode="a"), + ], + ) + + # Configure loggers for each DataSource. + for data_source_name in names_of_selected_data_sources: + log_filename = self.config.output_data.filepath / f"{data_source_name}.log" + nd_utils.configure_logging( + log_level=log_level, + logger_name=f"{__name__}.data_sources.{data_source_name}", + handlers=[logging.FileHandler(log_filename, mode="a")], + ) + def initialise_data_sources( self, names_of_selected_data_sources: Optional[list[str]] = ALL_DATA_SOURCE_NAMES ) -> None: diff --git a/nowcasting_dataset/utils.py b/nowcasting_dataset/utils.py index e04f8783..b18f5123 100644 --- a/nowcasting_dataset/utils.py +++ b/nowcasting_dataset/utils.py @@ -14,7 +14,7 @@ import nowcasting_dataset import nowcasting_dataset.filesystem.utils as nd_fs_utils from nowcasting_dataset.config import load, model -from nowcasting_dataset.consts import Array +from nowcasting_dataset.consts import LOG_LEVELS, Array logger = logging.getLogger(__name__) @@ -155,3 +155,27 @@ def inner_func(*args, **kwargs): return func(*args, **kwargs) return inner_func + + +def configure_logger(log_level: str, logger_name: str, handlers=list[logging.Handler]) -> None: + """Configure logger. + + Args: + log_level: String representing logging level, e.g. 'DEBUG'. + logger_name: String. + handlers: A list of logging.Handler objects. + """ + assert log_level in LOG_LEVELS + log_level = getattr(logging, log_level) # Convert string to int. + + formatter = logging.Formatter( + "%(asctime)s %(levelname)s processID=%(process)d %(message)s | %(pathname)s#L%(lineno)d" + ) + + local_logger = logging.getLogger(logger_name) + local_logger.setLevel(log_level) + + for handler in handlers: + handler.setLevel(log_level) + handler.setFormatter(formatter) + local_logger.addHandler(handler) diff --git a/scripts/prepare_ml_data.py b/scripts/prepare_ml_data.py index 818052e8..527ba813 100755 --- a/scripts/prepare_ml_data.py +++ b/scripts/prepare_ml_data.py @@ -11,15 +11,11 @@ import nowcasting_dataset from nowcasting_dataset import utils +from nowcasting_dataset.consts import LOG_LEVELS from nowcasting_dataset.data_sources import ALL_DATA_SOURCE_NAMES from nowcasting_dataset.manager import Manager -# Set up logging. -logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s at %(pathname)s#L%(lineno)d") -logging.getLogger("nowcasting_dataset.data_source").setLevel(logging.WARNING) - -logger = logging.getLogger("nowcasting_dataset") -logger.setLevel(logging.DEBUG) +logger = logging.getLogger(__name__) default_config_filename = Pathy(nowcasting_dataset.__file__).parent / "config" / "on_premises.yaml" @@ -55,11 +51,18 @@ " existing batches." ), ) +@click.option( + "--log_level", + default=logging.DEBUG, + type=click.Choice(LOG_LEVELS), + help=("The log level represented as a string. Defaults to DEBUG."), +) @utils.arg_logger -def main(config_filename: str, data_source: list[str], overwrite_batches: bool): +def main(config_filename: str, data_source: list[str], overwrite_batches: bool, log_level=str): """Generate pre-prepared batches of data.""" manager = Manager() manager.load_yaml_configuration(config_filename) + manager.configure_loggers(log_level=log_level, names_of_selected_data_sources=data_source) manager.initialise_data_sources(names_of_selected_data_sources=data_source) # TODO: Issue 323: maybe don't allow # create_files_specifying_spatial_and_temporal_locations_of_each_example to be run if a subset From e96611af2f97579537c342a0e5c4755ba0b2920f Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Tue, 16 Nov 2021 17:57:58 +0000 Subject: [PATCH 2/4] fix bugs --- nowcasting_dataset/manager.py | 4 ++-- scripts/prepare_ml_data.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nowcasting_dataset/manager.py b/nowcasting_dataset/manager.py index 15c03b58..685a66d3 100644 --- a/nowcasting_dataset/manager.py +++ b/nowcasting_dataset/manager.py @@ -70,7 +70,7 @@ def configure_loggers( """ # Configure combined logger. combined_log_filename = self.config.output_data.filepath / "combined.log" - nd_utils.configure_logging( + nd_utils.configure_logger( log_level=log_level, logger_name=__name__, handlers=[ @@ -82,7 +82,7 @@ def configure_loggers( # Configure loggers for each DataSource. for data_source_name in names_of_selected_data_sources: log_filename = self.config.output_data.filepath / f"{data_source_name}.log" - nd_utils.configure_logging( + nd_utils.configure_logger( log_level=log_level, logger_name=f"{__name__}.data_sources.{data_source_name}", handlers=[logging.FileHandler(log_filename, mode="a")], diff --git a/scripts/prepare_ml_data.py b/scripts/prepare_ml_data.py index 943d50d2..7c60bfe8 100755 --- a/scripts/prepare_ml_data.py +++ b/scripts/prepare_ml_data.py @@ -54,7 +54,7 @@ ) @click.option( "--log_level", - default=logging.DEBUG, + default="DEBUG", type=click.Choice(LOG_LEVELS), help=("The log level represented as a string. Defaults to DEBUG."), ) From 39030c6d45ffef7072003453e6325dca518301f4 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Tue, 16 Nov 2021 18:59:56 +0000 Subject: [PATCH 3/4] Logging to separate files works! --- nowcasting_dataset/manager.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nowcasting_dataset/manager.py b/nowcasting_dataset/manager.py index 685a66d3..ed1e0c90 100644 --- a/nowcasting_dataset/manager.py +++ b/nowcasting_dataset/manager.py @@ -72,7 +72,7 @@ def configure_loggers( combined_log_filename = self.config.output_data.filepath / "combined.log" nd_utils.configure_logger( log_level=log_level, - logger_name=__name__, + logger_name="nowcasting_dataset", handlers=[ logging.StreamHandler(), logging.FileHandler(combined_log_filename, mode="a"), @@ -84,7 +84,7 @@ def configure_loggers( log_filename = self.config.output_data.filepath / f"{data_source_name}.log" nd_utils.configure_logger( log_level=log_level, - logger_name=f"{__name__}.data_sources.{data_source_name}", + logger_name=f"nowcasting_dataset.data_sources.{data_source_name}", handlers=[logging.FileHandler(log_filename, mode="a")], ) @@ -446,5 +446,7 @@ def create_batches(self, overwrite_batches: bool) -> None: # the main process, and to wait for the worker to finish. exception = future.exception() if exception is not None: - logger.exception(f"Worker process {data_source_name} raised exception!") + logger.exception( + f"Worker process {data_source_name} raised exception!\n{exception}" + ) raise exception From 3040d959190569d5df1e445b65a1f5ea3bf1d140 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Tue, 16 Nov 2021 21:26:54 +0000 Subject: [PATCH 4/4] Add TODO to write test for Manager.configure_loggers() --- nowcasting_dataset/manager.py | 1 + tests/test_manager.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/nowcasting_dataset/manager.py b/nowcasting_dataset/manager.py index ed1e0c90..b68e80b1 100644 --- a/nowcasting_dataset/manager.py +++ b/nowcasting_dataset/manager.py @@ -56,6 +56,7 @@ def save_yaml_configuration(self): """Save configuration to the 'output_data' location""" config.save_yaml_configuration(configuration=self.config) + # TODO: Issue #322: Write test for Manager.configure_loggers() def configure_loggers( self, log_level: str, diff --git a/tests/test_manager.py b/tests/test_manager.py index 836b5cc2..bfa36824 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -13,6 +13,8 @@ from nowcasting_dataset.data_sources.sun.sun_data_source import SunDataSource from nowcasting_dataset.manager import Manager +# TODO: Issue #322: Write test for Manager.configure_loggers() + def test_sample_spatial_and_temporal_locations_for_examples(): # noqa: D103 local_path = Path(nowcasting_dataset.__file__).parent.parent