# Exploring the trainbow hNILs

## 1. Environment set up

Read in the required libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import trainbow.utils as utils
import trainbow.visualizations.image_viewers as viz
import trainbow.batch.segmenters as seg
import os

2022-09-02 14:44:10,291 [INFO] WRITING LOG OUTPUT TO /home/ec2-user/.cellpose/run.log


Set up the experiment specific information

In [2]:
plate_id = 'PB2454' #plate id
microscope_id = 6 # techdev scope or nikon 4
#Fluroscent map
channel_map ={
    'DAPI':1,
    'eGFP':2,
    'mOrange':0,
    'mKate2':3,
    'DPC_top':4,
    'DPC_bottom':5,
    'DPC_left':6,
    'DPC_right':7
}

well_map = {
            "Mix" : ['A01','A02'],
            "Control" : ['A03','A04'],
    
            "CA137_0ng" : ['B01'],
            "CA137_250ng" : ['B02'],
            "CA137_500ng" : ['B03'],
            "CA137_1000ng" : ['B04'],
            
            "CM137_0ng" : ['C01'],
            "CM137_250ng" : ['C02'],
            "CM137_500ng" : ['C03'],
            "CM137_1000ng" : ['C04'],
            
}

bucket = 's3://insitro-user/'
output_dir = os.path.join('saradha/',plate_id)

experiment_acquisition = utils.database_utils.create_acquistion_df(plate_id,microscope_id)
#drop duplicate file paths - keeping only the last row
experiment_acquisition = experiment_acquisition.drop_duplicates(subset='file_path', keep="last")

## instance segmentation 
nuc_mask_paths = seg.segment_nuclei_batch(acquisition_df = experiment_acquisition,
                                          channel_map = channel_map,
                                          num_cpus = 10,
                                          output_dir = os.path.join(bucket,output_dir)
                                          )
cell_mask_paths = seg.segment_cells_from_nuclei_batch(acquisition_df = experiment_acquisition,
                                                      channel_map = channel_map,
                                                      num_cpus = 10,
                                                      output_dir = os.path.join(bucket,output_dir))

feat_paths = extract_features_batch(acquisition_df = experiment_acquisition,
                                  channel_map = channel_map,
                                  num_cpus = 10,
                                  output_dir = os.path.join(bucket,output_dir))

#save the features df
features = pd.concat([utils.database_utils.load_obj(os.path.join(bucket,path)) 
              for path in utils.database_utils.get_file_list(os.path.join(output_dir,"brainbow_features"))])

utils.database_utils.save_object(features,os.path.join(os.path.join(bucket,output_dir),"cellular_brainbow_features.pkl"))
cell_features_plate1 = utils.database_utils.load_obj(os.path.join(os.path.join(bucket,output_dir),"cellular_brainbow_features.pkl"))

image_features = extract_features_batch_FOV(acquisition_df = experiment_acquisition,
                                      channel_map = channel_map,
                                      num_cpus = 10,
                                      output_dir = os.path.join(bucket,output_dir))
utils.database_utils.save_object(image_features,os.path.join(os.path.join(bucket,output_dir),"image_brainbow_features.pkl"))
img_features_plate1 = utils.database_utils.load_obj(os.path.join(os.path.join(bucket,output_dir),"image_brainbow_features.pkl"))


For the plate id PB2454, 1 unique measurements were found
Using 10 cpus
2022-09-02 14:44:17,257 [INFO] >>>> using CPU
2022-09-02 14:44:17,365 [INFO] >>>> using CPU
2022-09-02 14:44:17,370 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 14:44:17,399 [INFO] >>>> using CPU
2022-09-02 14:44:17,424 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 14:44:17,506 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 14:44:17,519 [INFO] >>>> using CPU
2022-09-02 14:44:17,535 [INFO] >>>> using CPU
2022-09-02 14:44:17,539 [INFO] >>>> using CPU
2022-09-02 14:44:17,585 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 14:44:17,600 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 14:44:17,628 [INFO] >>>> using CPU
2022-09-02 14:44:17,639 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 14:44:17,699 [INFO] >>>> using CPU
2022-09-02 14:44:17,719 [INFO] >>>> using CPU
2022-09-02 14:44:17,728 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 14:44:17,769 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 14:44:17,886 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 14:44:17,957 [INFO] >>>> u

2022-09-02 15:00:13,684 [INFO] >>>> using CPU
2022-09-02 15:00:14,105 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 15:00:16,287 [INFO] >>>> TOTAL TIME 192.04 sec
2022-09-02 15:00:22,733 [INFO] >>>> using CPU
2022-09-02 15:00:23,176 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 15:00:35,130 [INFO] >>>> TOTAL TIME 197.17 sec
2022-09-02 15:00:41,739 [INFO] >>>> using CPU
2022-09-02 15:00:42,101 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 15:00:54,032 [INFO] >>>> TOTAL TIME 195.87 sec
2022-09-02 15:01:00,122 [INFO] >>>> using CPU
2022-09-02 15:01:00,477 [INFO] >>>> TOTAL TIME 197.02 sec
2022-09-02 15:01:00,517 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 15:01:07,130 [INFO] >>>> using CPU
2022-09-02 15:01:07,526 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 15:01:10,117 [INFO] >>>> TOTAL TIME 193.94 sec
2022-09-02 15:01:17,422 [INFO] >>>> using CPU
2022-09-02 15:01:17,900 [INFO] ~~~ FINDING MASKS ~~~
2022-09-02 15:01:27,990 [INFO] >>>> TOTAL TIME 183.45 sec
2022-09-02 15:01:34,891 [INFO] >>>> using CPU
2022-09-02 1

RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
  File "/home/ec2-user/miniconda3/envs/insitro/lib/python3.9/site-packages/torch/utils/mkldnn.py", line 171, in forward
    @torch.jit.script_method
    def forward(self, x):
        return torch.batch_norm(
               ~~~~~~~~~~~~~~~~ <--- HERE
            x,
            self.weight,
RuntimeError: [enforce fail at CPUAllocator.cpp:68] . DefaultCPUAllocator: can't allocate memory: you tried to allocate 102760448 bytes. Error code 12 (Cannot allocate memory)


In [None]:
plate_id = 'PB2455' #plate id
microscope_id = 6 # techdev scope or nikon 4
#Fluroscent map
channel_map ={
    'DAPI':1,
    'eGFP':2,
    'mOrange':0,
    'mKate2':3,
    'DPC_top':4,
    'DPC_bottom':5,
    'DPC_left':6,
    'DPC_right':7
}

well_map = {
            "CD118_0ng" : ['A01'],
            "CD118_250ng" : ['A02'],
            "CD118_500ng" : ['A03'],
            "CD118_1000ng" : ['A04'],
            
            "CM130_0ng" : ['B01'],
            "CM130_250ng" : ['B02'],
            "CM130_500ng" : ['B03'],
            "CM130_1000ng" : ['B04'],
    
            "Cre-TAT" : ['C01'],
            "Mix" : ['C02','C03','C04'],
    
            
}

bucket = 's3://insitro-user/'
output_dir = os.path.join('saradha/',plate_id)

experiment_acquisition_p2 = utils.database_utils.create_acquistion_df(plate_id,microscope_id)
#drop duplicate file paths - keeping only the last row
experiment_acquisition_p2 = experiment_acquisition_p2.drop_duplicates(subset='file_path', keep="last")

## instance segmentation 
subset_df_p2 = experiment_acquisition_p2.groupby('well_loc').apply(lambda x: x.sample(10)).reset_index(drop=True)

nuc_mask_paths = seg.segment_nuclei_batch(acquisition_df = subset_df_p2,
                                          channel_map = channel_map,
                                          num_cpus = 10,
                                          output_dir = os.path.join(bucket,output_dir)
                                          )
cell_mask_paths = seg.segment_cells_from_nuclei_batch(acquisition_df = subset_df_p2,
                                                      channel_map = channel_map,
                                                      num_cpus = 10,
                                                      output_dir = os.path.join(bucket,output_dir))

feat_paths = extract_features_batch(acquisition_df = subset_df_p2,
                                  channel_map = channel_map,
                                  num_cpus = 10,
                                  output_dir = os.path.join(bucket,output_dir))

#save the features df
features = pd.concat([utils.database_utils.load_obj(os.path.join(bucket,path)) 
              for path in utils.database_utils.get_file_list(os.path.join(output_dir,"brainbow_features"))])

utils.database_utils.save_object(features,os.path.join(os.path.join(bucket,output_dir),"cellular_brainbow_features.pkl"))
cell_features_plate2 = utils.database_utils.load_obj(os.path.join(os.path.join(bucket,output_dir),"cellular_brainbow_features.pkl"))

image_features = extract_features_batch_FOV(acquisition_df = experiment_acquisition,
                                      channel_map = channel_map,
                                      num_cpus = 10,
                                      output_dir = os.path.join(bucket,output_dir))
utils.database_utils.save_object(image_features,os.path.join(os.path.join(bucket,output_dir),"image_brainbow_features.pkl"))
img_features_plate2 = utils.database_utils.load_obj(os.path.join(os.path.join(bucket,output_dir),"image_brainbow_features.pkl"))


In [None]:
features_plate1 = utils.database_utils.load_obj(os.path.join(os.path.join(bucket,output_dir),"cellular_brainbow_features.pkl"))
