# Example of CellProfiler project for idrstream

## Import Libraries

In [1]:
import pathlib
import pandas as pd
import shutil

from cellpose import core

import sys
sys.path.append("../")
from idrstream.CP_idr import CellProfilerRun

## Initialize idrstream

In [2]:
pipeline_path = pathlib.Path("example_files/CP_files/mitocheck_idr_cp.cppipe")
# need to fill in on fig
plugins_directory = pathlib.Path("../idrstream/CP_Plugins/")
idr_id = "idr0013"
tmp_dir = pathlib.Path("../tmp/")
final_data_dir = pathlib.Path("../mitocheck_control_features/CP_features_negative_control")
try:
    shutil.rmtree(tmp_dir)
    # uncomment the line below if you would like to remove the final data directory (e.g. all .csv.gz files)
    # shutil.rmtree(final_data_dir)
except:
    print("No files to remove")

stream = CellProfilerRun(pipeline_path, plugins_directory, idr_id, tmp_dir, final_data_dir, log='example_logs/cp_idrstream.log')

## Initialize CellProfiler metadata compiler

In [3]:
data_to_process_tsv = pathlib.Path("example_files/data_to_process.tsv")
metadata_save_path = pathlib.Path("example_files/data_to_process.csv")

stream.convert_tsv_to_csv(data_to_process_tsv, metadata_save_path)

## Load in metadata

In [4]:
data_to_process = pd.read_csv("example_files/data_to_process.tsv", sep="\t", index_col=0)
data_to_process

Unnamed: 0,Plate,Well,Frames,Well Number,Control Type,Original Gene Target,Plate_Map_Name,Gene_Replicate,Site,DNA
0,LT0001_02,A15,43,15,negative control,negative control,LT0001_02_15,1,1,LT0001_02/LT0001_02_15_43.tif
1,LT0001_02,B2,46,26,negative control,negative control,LT0001_02_26,1,1,LT0001_02/LT0001_02_26_46.tif
2,LT0001_02,C15,52,63,negative control,negative control,LT0001_02_63,1,1,LT0001_02/LT0001_02_63_52.tif
3,LT0001_02,D2,31,74,negative control,negative control,LT0001_02_74,1,1,LT0001_02/LT0001_02_74_31.tif
4,LT0001_02,M16,34,304,negative control,negative control,LT0001_02_304,1,1,LT0001_02/LT0001_02_304_34.tif
...,...,...,...,...,...,...,...,...,...,...
3848,LT0603_06,D2,51,74,negative control,negative control,LT0603_06_74,1,1,LT0603_06/LT0603_06_74_51.tif
3849,LT0603_06,M16,52,304,negative control,negative control,LT0603_06_304,1,1,LT0603_06/LT0603_06_304_52.tif
3850,LT0603_06,N3,49,315,negative control,negative control,LT0603_06_315,1,1,LT0603_06/LT0603_06_315_49.tif
3851,LT0603_06,O16,47,352,negative control,negative control,LT0603_06_352,1,1,LT0603_06/LT0603_06_352_47.tif


## Initialize Aspera downloader

In [5]:
# find the path in terminal using `ascli config ascp show`
aspera_path = pathlib.Path("/home/roshankern/.aspera/ascli/sdk/ascp")
aspera_key_path = pathlib.Path("example_files/asperaweb_id_dsa.openssh")
screens_path = pathlib.Path("example_files/idr0013-screenA-plates.tsv")

stream.init_downloader(aspera_path, aspera_key_path, screens_path)

## Initialize Fiji preprocessor

In [6]:
fiji_path = pathlib.Path("/home/roshankern/Desktop/Fiji.app")
stream.init_preprocessor(fiji_path)

[INFO] Overriding Leica ROI Reader; identifier: command:de.biovoxxel.utilities.RoiReader; jar: file:/home/roshankern/Desktop/Fiji.app/plugins/Biovoxxel_Plugins-2.5.6.jar


## Copy and create CellProfiler files/folders

In [7]:
metadata_path = pathlib.Path("example_files/data_to_process.csv")
stream.copy_CP_files(metadata_path)

## Confirm that GPU is activated for Cellpose to run

In [8]:
use_GPU = core.use_gpu()
print(f">>> GPU activated? {use_GPU}")
# logger_setup()

>>> GPU activated? True


## Run idrstream batches

In [9]:
stream.run_cp_stream(data_to_process, batch_size=1, start_batch=0, batch_nums=[0])

Completed: 87703K bytes transferred in 3 seconds
 (228543K bits/sec), in 1 file.
CellH5Reader initializing /home/roshankern/Desktop/Github/IDR_stream/tmp/downloads/LT0001_02/00015_01.ch5
Plate :/sample/0/plate/
Well :/sample/0/plate/LT0001_02--ex2005_11_16--sp2005_02_17--tt17--c3/experiment/
Site :/sample/0/plate/LT0001_02--ex2005_11_16--sp2005_02_17--tt17--c3/experiment/00015/position/
Parse segmentation ROIs for cell object primary__test : 0


Times reported are CPU and Wall-clock times for each module
Thu Feb 16 15:40:05 2023: Image # 1, module Images # 1: CPU_time = 0.00 secs, Wall_time = 0.00 secs
Thu Feb 16 15:40:05 2023: Image # 1, module Metadata # 2: CPU_time = 0.00 secs, Wall_time = 0.00 secs
Thu Feb 16 15:40:05 2023: Image # 1, module NamesAndTypes # 3: CPU_time = 0.42 secs, Wall_time = 0.11 secs
Thu Feb 16 15:40:05 2023: Image # 1, module Groups # 4: CPU_time = 0.00 secs, Wall_time = 0.00 secs
** TORCH CUDA version installed and working. **
>>>> using GPU
>> cyto << model set to be used
>>>> model diam_mean =  30.000 (ROIs rescaled to this size during training)
~~~ ESTIMATING CELL DIAMETER(S) ~~~
estimated cell diameter(s) in 2.92 sec
>>> diameter(s) = 
[ 27.03 ]
~~~ FINDING MASKS ~~~
>>>> TOTAL TIME 3.59 sec
Thu Feb 16 15:40:05 2023: Image # 1, module RunCellpose # 5: CPU_time = 4.80 secs, Wall_time = 4.62 secs
  back_pixels = skimage.morphology.erosion(back_pixels_mask, selem=selem)
  back_pixels = skimage.morpho