From 7382f440aa54027ac6fb9556b7147532d4f0e9c4 Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Thu, 18 Jan 2024 13:05:22 +0100 Subject: [PATCH 1/4] Add more logging output to add_id component. --- src/metadata/add_id/script.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/metadata/add_id/script.py b/src/metadata/add_id/script.py index b92e89d8d96..44a4548fc8c 100644 --- a/src/metadata/add_id/script.py +++ b/src/metadata/add_id/script.py @@ -37,9 +37,12 @@ def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: (unique for a sample) to each observation key, the observation key is made unique across all samples as well. """ - logger.info('Making observation keys unique across all samples.') + logger.info('Making observation keys unique across all ' + 'samples by appending prefix %s to the observation names.', + sample_id) sample.obs.index = f"{sample_id}_" + sample.obs.index make_observation_keys_unique_per_mod(sample_id, sample) + logger.info("Done making observation keys unique.") def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None: @@ -47,18 +50,28 @@ def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None Updating MuData.obs_names is not allowed (it is read-only). So the observation keys for each modality has to be updated manually. """ - for mod in sample.mod.values(): + for mod_name, mod in sample.mod.items(): + logger.info("Processing modality '%s'", mod_name) mod.obs_names = f"{sample_id}_" + mod.obs_names def main(): + logger.info("Reading input file '%s'.", par["input"]) input_data = read_h5mu(par["input"]) + logger.info("Adding column '%s' to global .obs dataframe, populated with ID '%s'", + par["obs_output"], par["input_id"]) input_data.obs[par["obs_output"]] = par["input_id"] - for mod_data in input_data.mod.values(): + logger.info("Done adding column to global .obs") + for mod_name, mod_data in input_data.mod.items(): + logger.info("Adding column '%s' to .obs dataframe for modality '%s', " + "populated with ID '%s'", par["obs_output"], mod_name, par["input_id"]) mod_data.obs[par["obs_output"]] = par["input_id"] + logger.info("Done adding per-modality columns.") if par["make_observation_keys_unique"]: make_observation_keys_unique(par["input_id"], input_data) - logger.info("Writing out data to '%s'.", par["output"]) + logger.info("Writing out data to '%s' with compression '%s'.", + par["output"], par["output_compression"]) input_data.write_h5mu(par["output"], compression=par["output_compression"]) + logger.info("Finished") if __name__ == '__main__': main() \ No newline at end of file From f1b7801b2399875137d2fb4adac69e25b2f1efe1 Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Thu, 18 Jan 2024 13:06:31 +0100 Subject: [PATCH 2/4] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3463f434048..508140aedca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ ## MINOR CHANGES +* `metadata/add_id`: add more runtime logging. + * `cluster/leiden`: Bump python to 3.11 and leidenalg to 0.10.0 (PR #645). * `mapping/htseq_count_to_h5mu` and `multi_star`: update polars and gtfparse (PR #642). From ac123c488d185e7895e320bac31c6db0d33277fc Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Thu, 18 Jan 2024 13:08:39 +0100 Subject: [PATCH 3/4] Added some quotes --- src/metadata/add_id/script.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/metadata/add_id/script.py b/src/metadata/add_id/script.py index 44a4548fc8c..af8581065e4 100644 --- a/src/metadata/add_id/script.py +++ b/src/metadata/add_id/script.py @@ -37,8 +37,8 @@ def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: (unique for a sample) to each observation key, the observation key is made unique across all samples as well. """ - logger.info('Making observation keys unique across all ' - 'samples by appending prefix %s to the observation names.', + logger.info("Making observation keys unique across all " + "samples by appending prefix '%s' to the observation names.", sample_id) sample.obs.index = f"{sample_id}_" + sample.obs.index make_observation_keys_unique_per_mod(sample_id, sample) From d72278401c2c66cb74c4c1aae9a908d39ffce65c Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Thu, 18 Jan 2024 13:10:04 +0100 Subject: [PATCH 4/4] Add PR number to CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 508140aedca..061393f3be9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ ## MINOR CHANGES -* `metadata/add_id`: add more runtime logging. +* `metadata/add_id`: add more runtime logging (PR #663). * `cluster/leiden`: Bump python to 3.11 and leidenalg to 0.10.0 (PR #645).