diff --git a/CHANGELOG.md b/CHANGELOG.md index 3463f434048..061393f3be9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ ## MINOR CHANGES +* `metadata/add_id`: add more runtime logging (PR #663). + * `cluster/leiden`: Bump python to 3.11 and leidenalg to 0.10.0 (PR #645). * `mapping/htseq_count_to_h5mu` and `multi_star`: update polars and gtfparse (PR #642). diff --git a/src/metadata/add_id/script.py b/src/metadata/add_id/script.py index b92e89d8d96..af8581065e4 100644 --- a/src/metadata/add_id/script.py +++ b/src/metadata/add_id/script.py @@ -37,9 +37,12 @@ def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: (unique for a sample) to each observation key, the observation key is made unique across all samples as well. """ - logger.info('Making observation keys unique across all samples.') + logger.info("Making observation keys unique across all " + "samples by appending prefix '%s' to the observation names.", + sample_id) sample.obs.index = f"{sample_id}_" + sample.obs.index make_observation_keys_unique_per_mod(sample_id, sample) + logger.info("Done making observation keys unique.") def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None: @@ -47,18 +50,28 @@ def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None Updating MuData.obs_names is not allowed (it is read-only). So the observation keys for each modality has to be updated manually. """ - for mod in sample.mod.values(): + for mod_name, mod in sample.mod.items(): + logger.info("Processing modality '%s'", mod_name) mod.obs_names = f"{sample_id}_" + mod.obs_names def main(): + logger.info("Reading input file '%s'.", par["input"]) input_data = read_h5mu(par["input"]) + logger.info("Adding column '%s' to global .obs dataframe, populated with ID '%s'", + par["obs_output"], par["input_id"]) input_data.obs[par["obs_output"]] = par["input_id"] - for mod_data in input_data.mod.values(): + logger.info("Done adding column to global .obs") + for mod_name, mod_data in input_data.mod.items(): + logger.info("Adding column '%s' to .obs dataframe for modality '%s', " + "populated with ID '%s'", par["obs_output"], mod_name, par["input_id"]) mod_data.obs[par["obs_output"]] = par["input_id"] + logger.info("Done adding per-modality columns.") if par["make_observation_keys_unique"]: make_observation_keys_unique(par["input_id"], input_data) - logger.info("Writing out data to '%s'.", par["output"]) + logger.info("Writing out data to '%s' with compression '%s'.", + par["output"], par["output_compression"]) input_data.write_h5mu(par["output"], compression=par["output_compression"]) + logger.info("Finished") if __name__ == '__main__': main() \ No newline at end of file