diff --git a/nowcasting_dataset/consts.py b/nowcasting_dataset/consts.py index be05264c..0cc969fc 100644 --- a/nowcasting_dataset/consts.py +++ b/nowcasting_dataset/consts.py @@ -129,3 +129,5 @@ "spatial_and_temporal_locations_of_each_example.csv" ) SPATIAL_AND_TEMPORAL_LOCATIONS_COLUMN_NAMES = ("t0_datetime_UTC", "x_center_OSGB", "y_center_OSGB") + +LOG_LEVELS = ("DEBUG", "INFO", "WARNING", "ERROR") diff --git a/nowcasting_dataset/manager.py b/nowcasting_dataset/manager.py index 5a180fed..b68e80b1 100644 --- a/nowcasting_dataset/manager.py +++ b/nowcasting_dataset/manager.py @@ -56,6 +56,39 @@ def save_yaml_configuration(self): """Save configuration to the 'output_data' location""" config.save_yaml_configuration(configuration=self.config) + # TODO: Issue #322: Write test for Manager.configure_loggers() + def configure_loggers( + self, + log_level: str, + names_of_selected_data_sources: Optional[list[str]] = ALL_DATA_SOURCE_NAMES, + ) -> None: + """Configure loggers. + + Print combined log to stdout. + Save combined log to self.config.output_data.filepath / combined.log + Save individual logs for each DataSource in + self.config.output_data.filepath / .log + """ + # Configure combined logger. + combined_log_filename = self.config.output_data.filepath / "combined.log" + nd_utils.configure_logger( + log_level=log_level, + logger_name="nowcasting_dataset", + handlers=[ + logging.StreamHandler(), + logging.FileHandler(combined_log_filename, mode="a"), + ], + ) + + # Configure loggers for each DataSource. + for data_source_name in names_of_selected_data_sources: + log_filename = self.config.output_data.filepath / f"{data_source_name}.log" + nd_utils.configure_logger( + log_level=log_level, + logger_name=f"nowcasting_dataset.data_sources.{data_source_name}", + handlers=[logging.FileHandler(log_filename, mode="a")], + ) + def initialise_data_sources( self, names_of_selected_data_sources: Optional[list[str]] = ALL_DATA_SOURCE_NAMES ) -> None: @@ -414,5 +447,7 @@ def create_batches(self, overwrite_batches: bool) -> None: # the main process, and to wait for the worker to finish. exception = future.exception() if exception is not None: - logger.exception(f"Worker process {data_source_name} raised exception!") + logger.exception( + f"Worker process {data_source_name} raised exception!\n{exception}" + ) raise exception diff --git a/nowcasting_dataset/utils.py b/nowcasting_dataset/utils.py index e04f8783..b18f5123 100644 --- a/nowcasting_dataset/utils.py +++ b/nowcasting_dataset/utils.py @@ -14,7 +14,7 @@ import nowcasting_dataset import nowcasting_dataset.filesystem.utils as nd_fs_utils from nowcasting_dataset.config import load, model -from nowcasting_dataset.consts import Array +from nowcasting_dataset.consts import LOG_LEVELS, Array logger = logging.getLogger(__name__) @@ -155,3 +155,27 @@ def inner_func(*args, **kwargs): return func(*args, **kwargs) return inner_func + + +def configure_logger(log_level: str, logger_name: str, handlers=list[logging.Handler]) -> None: + """Configure logger. + + Args: + log_level: String representing logging level, e.g. 'DEBUG'. + logger_name: String. + handlers: A list of logging.Handler objects. + """ + assert log_level in LOG_LEVELS + log_level = getattr(logging, log_level) # Convert string to int. + + formatter = logging.Formatter( + "%(asctime)s %(levelname)s processID=%(process)d %(message)s | %(pathname)s#L%(lineno)d" + ) + + local_logger = logging.getLogger(logger_name) + local_logger.setLevel(log_level) + + for handler in handlers: + handler.setLevel(log_level) + handler.setFormatter(formatter) + local_logger.addHandler(handler) diff --git a/scripts/prepare_ml_data.py b/scripts/prepare_ml_data.py index 58ae6271..7c60bfe8 100755 --- a/scripts/prepare_ml_data.py +++ b/scripts/prepare_ml_data.py @@ -11,15 +11,11 @@ import nowcasting_dataset from nowcasting_dataset import utils +from nowcasting_dataset.consts import LOG_LEVELS from nowcasting_dataset.data_sources import ALL_DATA_SOURCE_NAMES from nowcasting_dataset.manager import Manager -# Set up logging. -logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s at %(pathname)s#L%(lineno)d") -logging.getLogger("nowcasting_dataset.data_source").setLevel(logging.WARNING) - -logger = logging.getLogger("nowcasting_dataset") -logger.setLevel(logging.DEBUG) +logger = logging.getLogger(__name__) default_config_filename = Pathy(nowcasting_dataset.__file__).parent / "config" / "on_premises.yaml" @@ -56,11 +52,18 @@ " existing batches." ), ) +@click.option( + "--log_level", + default="DEBUG", + type=click.Choice(LOG_LEVELS), + help=("The log level represented as a string. Defaults to DEBUG."), +) @utils.arg_logger -def main(config_filename: str, data_source: list[str], overwrite_batches: bool): +def main(config_filename: str, data_source: list[str], overwrite_batches: bool, log_level=str): """Generate pre-prepared batches of data.""" manager = Manager() manager.load_yaml_configuration(config_filename) + manager.configure_loggers(log_level=log_level, names_of_selected_data_sources=data_source) manager.initialise_data_sources(names_of_selected_data_sources=data_source) # TODO: Issue 323: maybe don't allow # create_files_specifying_spatial_and_temporal_locations_of_each_example to be run if a subset diff --git a/tests/test_manager.py b/tests/test_manager.py index 836b5cc2..bfa36824 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -13,6 +13,8 @@ from nowcasting_dataset.data_sources.sun.sun_data_source import SunDataSource from nowcasting_dataset.manager import Manager +# TODO: Issue #322: Write test for Manager.configure_loggers() + def test_sample_spatial_and_temporal_locations_for_examples(): # noqa: D103 local_path = Path(nowcasting_dataset.__file__).parent.parent