Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

STYLE: Ensure new line at the end of JSON files #410

Merged
merged 2 commits into from
Jun 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
sys.path.insert(0, str(health_ml_root))
sys.path.insert(0, str(health_azure_root))

from health_ml.utils.common_utils import df_to_json # noqa: E402
from health_azure import aggregate_hyperdrive_metrics # NOQA: E402
from health_azure.utils import get_aml_run_from_run_id, get_metrics_for_childless_run # NOQA: E402

Expand Down Expand Up @@ -79,7 +80,7 @@ def upload_regression_metrics_file_to_run(metrics_df: pd.DataFrame, run: Run) ->
regression_results_dir.mkdir(exist_ok=True)
metrics_json_output = regression_results_dir / "metrics.json"

metrics_df.to_json(metrics_json_output)
df_to_json(metrics_df, metrics_json_output)
print("Uploading metrics file to AML Run")
run.upload_file("outputs/regression_metrics.json", str(metrics_json_output))
metrics_json_output.unlink()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pandas as pd
from azureml.core import Experiment, Run, Workspace

from health_ml.utils.common_utils import df_to_json
from health_azure.utils import (aggregate_hyperdrive_metrics, download_file_if_necessary, get_aml_run_from_run_id,
get_tags_from_hyperdrive_run)
from histopathology.utils.output_utils import (AML_LEGACY_TEST_OUTPUTS_CSV, AML_TEST_OUTPUTS_CSV,
Expand Down Expand Up @@ -116,7 +117,7 @@ def collect_crossval_metrics(parent_run_id: str, download_dir: Path, aml_workspa
aml_workspace=aml_workspace)
metrics_json.parent.mkdir(parents=True, exist_ok=True)
print(f"Writing AML metrics file to {metrics_json}")
metrics_df.to_json(metrics_json)
df_to_json(metrics_df, metrics_json)
fepegar marked this conversation as resolved.
Show resolved Hide resolved
return metrics_df.sort_index(axis='columns')


Expand Down
12 changes: 8 additions & 4 deletions hi-ml/src/health_ml/run_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@
from health_ml.utils import fixed_paths
from health_ml.utils.checkpoint_handler import CheckpointHandler
from health_ml.utils.common_utils import (
EFFECTIVE_RANDOM_SEED_KEY_NAME, change_working_directory,
RUN_RECOVERY_ID_KEY, RUN_RECOVERY_FROM_ID_KEY_NAME)
EFFECTIVE_RANDOM_SEED_KEY_NAME,
change_working_directory,
RUN_RECOVERY_ID_KEY,
RUN_RECOVERY_FROM_ID_KEY_NAME,
df_to_json,
)
from health_ml.utils.lightning_loggers import StoringLogger
from health_ml.utils.regression_test_utils import REGRESSION_TEST_METRICS_FILENAME, compare_folders_and_run_outputs
from health_ml.utils.type_annotations import PathOrString
Expand Down Expand Up @@ -174,9 +178,9 @@ def run(self) -> None:
keep_metrics=regression_metrics)

if not df.empty:
metrics_filename = str(self.container.outputs_folder / REGRESSION_TEST_METRICS_FILENAME)
metrics_filename = self.container.outputs_folder / REGRESSION_TEST_METRICS_FILENAME
logging.info(f"Saving metrics to {metrics_filename}")
df.to_json(metrics_filename)
df_to_json(df, metrics_filename)

compare_folders_and_run_outputs(expected=self.container.regression_test_folder,
actual=self.container.outputs_folder,
Expand Down
14 changes: 14 additions & 0 deletions hi-ml/src/health_ml/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import torch
from torch.nn import Module
import pandas as pd
from health_azure import paths

from health_azure.utils import PathOrString, is_conda_file_with_pip_include
Expand Down Expand Up @@ -237,3 +238,16 @@ def is_long_path(path: PathOrString) -> bool:
:return: True if the length of the path is greater than MAX_PATH_LENGTH, else False
"""
return len(str(path)) > MAX_PATH_LENGTH


def df_to_json(df: pd.DataFrame, json_path: Path, add_newline: bool = True) -> None:
"""Save a data frame to a JSON file.

:param df: Input data frame.
:param json_path: Path to output JSON file.
:param add_newline: If ``True``, add newline at the end of the JSON file for POSIX compliance.
"""
text = df.to_json()
if add_newline:
text += '\n'
json_path.write_text(text)