microsoft · fepegar · Jun 7, 2022 · Jun 7, 2022 · Jun 7, 2022
diff --git a/hi-ml-histopathology/src/histopathology/scripts/aggregate_metrics_crossvalidation.py b/hi-ml-histopathology/src/histopathology/scripts/aggregate_metrics_crossvalidation.py
@@ -20,6 +20,7 @@
 sys.path.insert(0, str(health_ml_root))
 sys.path.insert(0, str(health_azure_root))
 
+from health_ml.utils.common_utils import df_to_json  # noqa: E402
 from health_azure import aggregate_hyperdrive_metrics  # NOQA: E402
 from health_azure.utils import get_aml_run_from_run_id, get_metrics_for_childless_run  # NOQA: E402
 
@@ -79,7 +80,7 @@ def upload_regression_metrics_file_to_run(metrics_df: pd.DataFrame, run: Run) ->
     regression_results_dir.mkdir(exist_ok=True)
     metrics_json_output = regression_results_dir / "metrics.json"
 
-    metrics_df.to_json(metrics_json_output)
+    df_to_json(metrics_df, metrics_json_output)
     print("Uploading metrics file to AML Run")
     run.upload_file("outputs/regression_metrics.json", str(metrics_json_output))
     metrics_json_output.unlink()

diff --git a/hi-ml-histopathology/src/histopathology/utils/report_utils.py b/hi-ml-histopathology/src/histopathology/utils/report_utils.py
@@ -11,6 +11,7 @@
 import pandas as pd
 from azureml.core import Experiment, Run, Workspace
 
+from health_ml.utils.common_utils import df_to_json
 from health_azure.utils import (aggregate_hyperdrive_metrics, download_file_if_necessary, get_aml_run_from_run_id,
                                 get_tags_from_hyperdrive_run)
 from histopathology.utils.output_utils import (AML_LEGACY_TEST_OUTPUTS_CSV, AML_TEST_OUTPUTS_CSV,
@@ -116,7 +117,7 @@ def collect_crossval_metrics(parent_run_id: str, download_dir: Path, aml_workspa
                                                   aml_workspace=aml_workspace)
         metrics_json.parent.mkdir(parents=True, exist_ok=True)
         print(f"Writing AML metrics file to {metrics_json}")
-        metrics_df.to_json(metrics_json)
+        df_to_json(metrics_df, metrics_json)
     return metrics_df.sort_index(axis='columns')
 
 

diff --git a/hi-ml/src/health_ml/run_ml.py b/hi-ml/src/health_ml/run_ml.py
@@ -22,8 +22,12 @@
 from health_ml.utils import fixed_paths
 from health_ml.utils.checkpoint_handler import CheckpointHandler
 from health_ml.utils.common_utils import (
-    EFFECTIVE_RANDOM_SEED_KEY_NAME, change_working_directory,
-    RUN_RECOVERY_ID_KEY, RUN_RECOVERY_FROM_ID_KEY_NAME)
+    EFFECTIVE_RANDOM_SEED_KEY_NAME,
+    change_working_directory,
+    RUN_RECOVERY_ID_KEY,
+    RUN_RECOVERY_FROM_ID_KEY_NAME,
+    df_to_json,
+)
 from health_ml.utils.lightning_loggers import StoringLogger
 from health_ml.utils.regression_test_utils import REGRESSION_TEST_METRICS_FILENAME, compare_folders_and_run_outputs
 from health_ml.utils.type_annotations import PathOrString
@@ -174,9 +178,9 @@ def run(self) -> None:
                             keep_metrics=regression_metrics)
 
                     if not df.empty:
-                        metrics_filename = str(self.container.outputs_folder / REGRESSION_TEST_METRICS_FILENAME)
+                        metrics_filename = self.container.outputs_folder / REGRESSION_TEST_METRICS_FILENAME
                         logging.info(f"Saving metrics to {metrics_filename}")
-                        df.to_json(metrics_filename)
+                        df_to_json(df, metrics_filename)
 
                 compare_folders_and_run_outputs(expected=self.container.regression_test_folder,
                                                 actual=self.container.outputs_folder,

diff --git a/hi-ml/src/health_ml/utils/common_utils.py b/hi-ml/src/health_ml/utils/common_utils.py
@@ -13,6 +13,7 @@
 
 import torch
 from torch.nn import Module
+import pandas as pd
 from health_azure import paths
 
 from health_azure.utils import PathOrString, is_conda_file_with_pip_include
@@ -237,3 +238,16 @@ def is_long_path(path: PathOrString) -> bool:
     :return: True if the length of the path is greater than MAX_PATH_LENGTH, else False
     """
     return len(str(path)) > MAX_PATH_LENGTH
+
+
+def df_to_json(df: pd.DataFrame, json_path: Path, add_newline: bool = True) -> None:
+    """Save a data frame to a JSON file.
+
+    :param df: Input data frame.
+    :param json_path: Path to output JSON file.
+    :param add_newline: If ``True``, add newline at the end of the JSON file for POSIX compliance.
+    """
+    text = df.to_json()
+    if add_newline:
+        text += '\n'
+    json_path.write_text(text)