Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions nowcasting_dataset/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,5 @@
"spatial_and_temporal_locations_of_each_example.csv"
)
SPATIAL_AND_TEMPORAL_LOCATIONS_COLUMN_NAMES = ("t0_datetime_UTC", "x_center_OSGB", "y_center_OSGB")

LOG_LEVELS = ("DEBUG", "INFO", "WARNING", "ERROR")
37 changes: 36 additions & 1 deletion nowcasting_dataset/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,39 @@ def save_yaml_configuration(self):
"""Save configuration to the 'output_data' location"""
config.save_yaml_configuration(configuration=self.config)

# TODO: Issue #322: Write test for Manager.configure_loggers()
def configure_loggers(
self,
log_level: str,
names_of_selected_data_sources: Optional[list[str]] = ALL_DATA_SOURCE_NAMES,
) -> None:
"""Configure loggers.

Print combined log to stdout.
Save combined log to self.config.output_data.filepath / combined.log
Save individual logs for each DataSource in
self.config.output_data.filepath / <data_source>.log
"""
# Configure combined logger.
combined_log_filename = self.config.output_data.filepath / "combined.log"
nd_utils.configure_logger(
log_level=log_level,
logger_name="nowcasting_dataset",
handlers=[
logging.StreamHandler(),
logging.FileHandler(combined_log_filename, mode="a"),
],
)

# Configure loggers for each DataSource.
for data_source_name in names_of_selected_data_sources:
log_filename = self.config.output_data.filepath / f"{data_source_name}.log"
nd_utils.configure_logger(
log_level=log_level,
logger_name=f"nowcasting_dataset.data_sources.{data_source_name}",
handlers=[logging.FileHandler(log_filename, mode="a")],
)

def initialise_data_sources(
self, names_of_selected_data_sources: Optional[list[str]] = ALL_DATA_SOURCE_NAMES
) -> None:
Expand Down Expand Up @@ -414,5 +447,7 @@ def create_batches(self, overwrite_batches: bool) -> None:
# the main process, and to wait for the worker to finish.
exception = future.exception()
if exception is not None:
logger.exception(f"Worker process {data_source_name} raised exception!")
logger.exception(
f"Worker process {data_source_name} raised exception!\n{exception}"
)
raise exception
26 changes: 25 additions & 1 deletion nowcasting_dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import nowcasting_dataset
import nowcasting_dataset.filesystem.utils as nd_fs_utils
from nowcasting_dataset.config import load, model
from nowcasting_dataset.consts import Array
from nowcasting_dataset.consts import LOG_LEVELS, Array

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -155,3 +155,27 @@ def inner_func(*args, **kwargs):
return func(*args, **kwargs)

return inner_func


def configure_logger(log_level: str, logger_name: str, handlers=list[logging.Handler]) -> None:
"""Configure logger.

Args:
log_level: String representing logging level, e.g. 'DEBUG'.
logger_name: String.
handlers: A list of logging.Handler objects.
"""
assert log_level in LOG_LEVELS
log_level = getattr(logging, log_level) # Convert string to int.

formatter = logging.Formatter(
"%(asctime)s %(levelname)s processID=%(process)d %(message)s | %(pathname)s#L%(lineno)d"
)

local_logger = logging.getLogger(logger_name)
local_logger.setLevel(log_level)

for handler in handlers:
handler.setLevel(log_level)
handler.setFormatter(formatter)
local_logger.addHandler(handler)
17 changes: 10 additions & 7 deletions scripts/prepare_ml_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,11 @@

import nowcasting_dataset
from nowcasting_dataset import utils
from nowcasting_dataset.consts import LOG_LEVELS
from nowcasting_dataset.data_sources import ALL_DATA_SOURCE_NAMES
from nowcasting_dataset.manager import Manager

# Set up logging.
logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s at %(pathname)s#L%(lineno)d")
logging.getLogger("nowcasting_dataset.data_source").setLevel(logging.WARNING)

logger = logging.getLogger("nowcasting_dataset")
logger.setLevel(logging.DEBUG)
logger = logging.getLogger(__name__)

default_config_filename = Pathy(nowcasting_dataset.__file__).parent / "config" / "on_premises.yaml"

Expand Down Expand Up @@ -56,11 +52,18 @@
" existing batches."
),
)
@click.option(
"--log_level",
default="DEBUG",
type=click.Choice(LOG_LEVELS),
help=("The log level represented as a string. Defaults to DEBUG."),
)
@utils.arg_logger
def main(config_filename: str, data_source: list[str], overwrite_batches: bool):
def main(config_filename: str, data_source: list[str], overwrite_batches: bool, log_level=str):
"""Generate pre-prepared batches of data."""
manager = Manager()
manager.load_yaml_configuration(config_filename)
manager.configure_loggers(log_level=log_level, names_of_selected_data_sources=data_source)
manager.initialise_data_sources(names_of_selected_data_sources=data_source)
# TODO: Issue 323: maybe don't allow
# create_files_specifying_spatial_and_temporal_locations_of_each_example to be run if a subset
Expand Down
2 changes: 2 additions & 0 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from nowcasting_dataset.data_sources.sun.sun_data_source import SunDataSource
from nowcasting_dataset.manager import Manager

# TODO: Issue #322: Write test for Manager.configure_loggers()


def test_sample_spatial_and_temporal_locations_for_examples(): # noqa: D103
local_path = Path(nowcasting_dataset.__file__).parent.parent
Expand Down