# Finalise source for transfer

In [7]:
import os
from pathlib import Path
import zarr
import dask.array as da
from tqdm.notebook import tqdm
from lamin_utils import logger

import joblib
from contextlib import contextmanager

source_results_path = Path("/ictstr01/groups/ml01/projects/2023_ttreis_segment_JUMP/snakemake/final_source08/snakemake/results/aggregated")


## 1) Rename the cellpose folder

In [16]:
suspected_path = source_results_path / "broad_compressed/cellpainting-gallery/cpg0016-jump/source_8/workspace/segmentation/cellpose"

if suspected_path.exists():
    logger.warning("cellpose folder needs to be renamed")
    suspected_path.rename(suspected_path.parent / "cellpose_202404")
    logger.info("'cellpose' renamed to 'cellpose_202404'")

❗ cellpose folder needs to be renamed
💡 'cellpose' renamed to 'cellpose_202404'


## 2) Create model info

In [18]:
model_info_path = suspected_path.parent / "cellpose_202404" / "model"

if not model_info_path.exists():
    logger.warning("model info folder does not exist")
    model_info_path.mkdir(parents=True)
    logger.info("model info folder created")

❗ model info folder does not exist
💡 model info folder created


In [22]:
README_content = """# Notes

These files were created using a Cellpose-based snakemake pipeline. More
information can be found here: https://github.com/theislab/jump-cpg0016-segmentation

## Relevant software versions:
- https://github.com/theislab/jump-cpg0016-segmentation@v0.1.0
- cellpose=2.2.3=pyhd8ed1ab_0
- sparcscore==1.0.0

## Literature
- "Cellpose: a generalist algorithm for cellular segmentation", Stringer et al.,
  2021, https://www.nature.com/articles/s41592-020-01018-x
- "Three million images and morphological profiles of cells treated with
  matched chemical and genetic perturbations", Chandrasekaran et al., 2024, https://www.biorxiv.org/content/10.1101/2022.01.05.475090v3
- "SPARCS, a platform for genome-scale CRISPR screening for spatial cellular
  phenotypes", Schmacke et al., 2023, https://www.biorxiv.org/content/10.1101/2023.06.01.542416v1
"""

if (model_info_path / "README.md").exists():
    logger.warning("README.md already exists, overwriting")
    os.remove(model_info_path / "README.md")

with open(model_info_path / "README.md", "w") as f:
    f.write(README_content)

❗ README.md already exists, overwriting


## 3) Create channel_mapping.json files

In [36]:
data_path = suspected_path.parent / "cellpose_202404" / "objects"
batch_paths = [f for f in data_path.glob("*") if f.is_dir()]
plate_paths = [f for batch_path in batch_paths for f in batch_path.glob("*") if f.is_dir()]

channel_mapping_contents = '{"0": "NucleusMask", "1": "CellMask", "2": "DNA", "3": "AGP", "4": "ER", "5": "Mito", "6": "RNA"}'

for plate_path in plate_paths:

    if (plate_path / "channel_mapping.json").exists():
        logger.warning("channel_mapping.json already exists, overwriting")
        os.remove(model_info_path / "channel_mapping.json")

    with open(plate_path / "channel_mapping.json", "w") as f:
        f.write(channel_mapping_contents)
