In [1]:
import os
from os import listdir
from os.path import isfile, join

import pandas as pd
from itertools import islice
import numpy as np

from skimage.external import tifffile
from skimage.external.tifffile import imread

import matplotlib.pyplot as plt

import ipywidgets as widgets

import random

from scipy.ndimage.morphology import binary_erosion as br
from skimage import morphology as skmor

from scipy import ndimage
from PIL import Image, ImageDraw

import numpy.ma as ma

from skimage.measure import label, regionprops

## to calculate
1.	nucleus_MCM_total
2.	outer_MCM_total
3.	inner_MCM_total
4.	nucleus_MCM_hetChrom
5.	outer_MCM_hetChrom
6.	inner_MCM_hetChrom
7.	nucleus_MCM_euChrom
8.	outer_MCM_euChrom
9.	inner_MCM_euChrom


# 20191202 dataset

In [2]:
bigDir=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20191202_new dataset'

In [3]:
file=f'{bigDir}\\signals_new_v5.csv'
data=pd.read_csv(file)

# 20200217 geminin dataset

In [4]:
bigDir=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200217_geminin'

In [5]:
file2=f'{bigDir}\\200217_cellinfo_v8.csv'
data2=pd.read_csv(file2)

# 20200207 new dataset

In [2]:
bigDir=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200207_new data'

file3=f'{bigDir}\\cellinfo_200203_v10.csv'
data3=pd.read_csv(file3)

# mcm calculations

In [3]:
def calculate_mcm_signals (name, data,percentage):
    ## if the given dataset is new, the mcm channel is 0
    ## else, mcm channel is 1 (for original dataset)
    if name=="new":
        mcm=0
    else:
        mcm=1
    i=0
    perOrg=percentage
    progBar=widgets.IntProgress(
        value=i,
        min=0,
        max=len(data),
        step=1,
        description='Progress:',
        orientation='horizontal'
    )
    display(progBar)
    for i,myCell in islice(data.iterrows(), i,None):
        #only for 20200217_geminin dataset
        # control group's percentage will be 20%
        '''
        if myCell.group == "CTL":
            percentage = perOrg
        else:
            percentage=20
        '''
        # i = index of the row
        # myCell the entire row of the that index i = data.iloc[i]
        
        ## opening cell image
        try:
            myImage=imread(myCell.path) 
        except:
            myCell.path=myCell.path.replace('.tif','.tiff')
            myImage=imread(myCell.path)
        
        ## setting arrays with names 
        
        ### for the column names
        pixels=['total','het','eu']
        components=['nuc','inner','outer']
        
        ### for image names
        dirs=['segmentation_nucleus_Otsu','erosion_nuc_mask','erosion_nuc_mask',
              f'segmentation_{percentage}',f'segmentation_{percentage}_erosion',f'segmentation_{percentage}_erosion',
             f'segmentation_{percentage}_eu',f'segmentation_{percentage}_eu_erosion',f'segmentation_{percentage}_eu_erosion']
        
        files=['_nucleus.tif',f'_eroded_9^5_inner_mask_20.tif',f'_eroded_9^5_outer_mask_20.tif',
                   '_hetChrom.tif', '_hetChrom_inner.tif', '_hetChrom_outer.tif',
                   '_euChrom.tif', '_euChrom_inner.tif','_euChrom_outer.tif']

        ## making a dummy dataframe to store the signal calculations
        df=pd.DataFrame(columns=['nuc_vol_total','inner_vol_total','outer_vol_total',
                                 'nuc_mcm_total','inner_mcm_total','outer_mcm_total',
                                'nuc_vol_het','inner_vol_het','outer_vol_het',
                                 'nuc_mcm_het','inner_mcm_het','outer_mcm_het',
                                 'nuc_vol_eu','inner_vol_eu','outer_vol_eu',
                                'nuc_mcm_eu','inner_mcm_eu', 'outer_mcm_eu'])

        ## signals calculated
        for k in range(len(pixels)): # 0 to 2 - 3 values for total, het, eu
            for j in range(len(components)): 
                # 0 to 2 
                           
                # 1 loop
                # total (k=0) , nuc  (j=0)
                # total (k=0), inner (j=1)
                # total (k=0), outer (j=2)
                           
                ## copying image every time new signal is calculated
                myIm=myImage.copy()
                myChannel=myIm[:,mcm,:,:] 
                
                ## column name set up based on the component and the pixels
                currColName=f'{components[j]}_mcm_{pixels[k]}' # nuc_mcm_total
                
                ## opening mask images
                ### the indexing is in such way so that the image opened matches up with the name being used for calculation
                direc=myCell.path.replace('data_tiff',dirs[(j)+(k*3)]) 
                    # dir[0+0] = dir[0]= segmentation_nuclues_otsu
                    # dirs[1+0] = dir[1] = erosion_nuc_mask
                    # dir[2+0] = dir[2] = erosion_nuc_mask
                
                           
                try:
                    direc=direc.replace('.tif',files[(k*3)+(j)])
                    mask=imread(direc)  
                except:
                    direc=direc.replace('_nucleus.tiff',files[(k*3)+(j)])
                    mask=imread(direc)

                ## changing mask type to binary
                mask=mask.astype(bool)
                
                ## masking the image
                myChannel[~mask]=0
                
                ## calculating the signal
                df[f'{currColName}']=myChannel.sum(-1).sum(-1)
                           
                ## calculating the volume of the pixels
                currColName=f'{components[j]}_vol_{pixels[k]}'
                df.loc[0,f'{currColName}']=np.sum(mask)
        
        ## saving the calculated signals to original/big dataframe
        for myMeasurement in df.columns:
            myString=f"data.loc[i,'{myMeasurement}']=np.sum(df.{myMeasurement})"
            exec(myString)
        
        
        progBar.value=progBar.value+1
        percentage=perOrg
    
    
        

In [10]:
calculate_mcm_signals("new", data, 10)

IntProgress(value=0, description='Progress:', max=276)

In [11]:
(data.nuc_vol_het/data.nuc_vol_total)[69]

0.10002302696374224

In [12]:
data.to_csv(file.replace("v5","v6"))

In [13]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data['nuc_mcm_het']+data['nuc_mcm_eu']))/np.sum(data['nuc_mcm_total']),
(np.mean(data['inner_mcm_het'])+np.mean(data['inner_mcm_eu']))/np.mean(data['inner_mcm_total']),
(np.sum(data['outer_mcm_het']+data['outer_mcm_eu']))/np.sum(data['outer_mcm_total']))

1.0000000019431863 0.9999999990927403 1.0000000058301468


In [14]:
print(
(np.sum(data['inner_mcm_het']+data['outer_mcm_het']))/np.sum(data['nuc_mcm_het']),
(np.mean(data['outer_mcm_eu'])+np.mean(data['inner_mcm_eu']))/np.mean(data['nuc_mcm_eu']),
(np.sum(data['nuc_mcm_het'])+np.sum(data['nuc_mcm_eu']))/np.sum(data['nuc_mcm_total']),
(np.sum(data['inner_mcm_total']+data['outer_mcm_total'])/np.sum(data['nuc_mcm_total'])))

1.0000000024849314 0.9999999998523657 1.0000000019431863 0.9999999976557731


In [15]:
calculate_mcm_signals("new",data,50)

IntProgress(value=0, description='Progress:', max=276)

In [16]:
(data.nuc_vol_het/data.nuc_vol_total)[69]

0.500173395811312

In [17]:
data.to_csv(file.replace("v5","v6_50"))

In [18]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data['nuc_mcm_het']+data['nuc_mcm_eu']))/np.sum(data['nuc_mcm_total']),
(np.mean(data['inner_mcm_het'])+np.mean(data['inner_mcm_eu']))/np.mean(data['inner_mcm_total']),
(np.sum(data['outer_mcm_het']+data['outer_mcm_eu']))/np.sum(data['outer_mcm_total']))

1.0000000032414003 0.999999999346692 1.0000000047508724


In [19]:
print(
(np.sum(data['inner_mcm_het']+data['outer_mcm_het']))/np.sum(data['nuc_mcm_het']),
(np.mean(data['outer_mcm_eu'])+np.mean(data['inner_mcm_eu']))/np.mean(data['nuc_mcm_eu']),
(np.sum(data['nuc_mcm_het'])+np.sum(data['nuc_mcm_eu']))/np.sum(data['nuc_mcm_total']),
(np.sum(data['inner_mcm_total']+data['outer_mcm_total'])/np.sum(data['nuc_mcm_total'])))

0.9999999959318131 1.0000000013997972 1.0000000032414003 0.9999999976557731


In [22]:
calculate_mcm_signals("new",data2,10)

IntProgress(value=0, description='Progress:', max=145)

In [24]:
(data2.nuc_vol_het/data2.nuc_vol_total)[69]

0.10000872577175657

In [25]:
data2.to_csv(file2.replace("v8","v9_10"))

In [26]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data2['nuc_mcm_het']+data2['nuc_mcm_eu']))/np.sum(data2['nuc_mcm_total']),
(np.mean(data2['inner_mcm_het'])+np.mean(data2['inner_mcm_eu']))/np.mean(data2['inner_mcm_total']),
(np.sum(data2['outer_mcm_het']+data2['outer_mcm_eu']))/np.sum(data2['outer_mcm_total']))

1.0 1.0 1.0


In [27]:
print(
(np.sum(data2['inner_mcm_het']+data2['outer_mcm_het']))/np.sum(data2['nuc_mcm_het']),
(np.mean(data2['outer_mcm_eu'])+np.mean(data2['inner_mcm_eu']))/np.mean(data2['nuc_mcm_eu']),
(np.sum(data2['nuc_mcm_het'])+np.sum(data2['nuc_mcm_eu']))/np.sum(data2['nuc_mcm_total']),
(np.sum(data2['inner_mcm_total']+data2['outer_mcm_total'])/np.sum(data2['nuc_mcm_total'])))

1.0 1.0000000000000002 1.0 1.0


In [28]:
calculate_mcm_signals("new",data2,50)

IntProgress(value=0, description='Progress:', max=145)

In [29]:
(data2.nuc_vol_het/data2.nuc_vol_total)[69]

0.5001119175073125

In [30]:
data2.to_csv(file2.replace("v8","v9_50"))

In [31]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data2['nuc_mcm_het']+data2['nuc_mcm_eu']))/np.sum(data2['nuc_mcm_total']),
(np.mean(data2['inner_mcm_het'])+np.mean(data2['inner_mcm_eu']))/np.mean(data2['inner_mcm_total']),
(np.sum(data2['outer_mcm_het']+data2['outer_mcm_eu']))/np.sum(data2['outer_mcm_total']))

1.0 1.0 1.0


In [32]:
print(
(np.sum(data2['inner_mcm_het']+data2['outer_mcm_het']))/np.sum(data2['nuc_mcm_het']),
(np.mean(data2['outer_mcm_eu'])+np.mean(data2['inner_mcm_eu']))/np.mean(data2['nuc_mcm_eu']),
(np.sum(data2['nuc_mcm_het'])+np.sum(data2['nuc_mcm_eu']))/np.sum(data2['nuc_mcm_total']),
(np.sum(data2['inner_mcm_total']+data2['outer_mcm_total'])/np.sum(data2['nuc_mcm_total'])))

1.0 1.0 1.0 1.0


## 200207 new data

In [4]:
calculate_mcm_signals("new",data3,10)

IntProgress(value=0, description='Progress:', max=146)

In [5]:
(data3.nuc_vol_het/data3.nuc_vol_total)[69]

0.10001578264601835

In [6]:
data3.to_csv(file3.replace("v10","v10_10"))

In [7]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data3['nuc_mcm_het']+data3['nuc_mcm_eu']))/np.sum(data3['nuc_mcm_total']),
(np.mean(data3['inner_mcm_het'])+np.mean(data3['inner_mcm_eu']))/np.mean(data3['inner_mcm_total']),
(np.sum(data3['outer_mcm_het']+data3['outer_mcm_eu']))/np.sum(data3['outer_mcm_total']))

1.0 1.0 1.0


In [8]:
print(
(np.sum(data3['inner_mcm_het']+data3['outer_mcm_het']))/np.sum(data3['nuc_mcm_het']),
(np.mean(data3['outer_mcm_eu'])+np.mean(data3['inner_mcm_eu']))/np.mean(data3['nuc_mcm_eu']),
(np.sum(data3['nuc_mcm_het'])+np.sum(data3['nuc_mcm_eu']))/np.sum(data3['nuc_mcm_total']),
(np.sum(data3['inner_mcm_total']+data3['outer_mcm_total'])/np.sum(data3['nuc_mcm_total'])))

1.0 0.9999999999999998 1.0 1.0


In [9]:
calculate_mcm_signals("new",data3,50)

IntProgress(value=0, description='Progress:', max=146)

In [10]:
(data3.nuc_vol_het/data3.nuc_vol_total)[69]

0.5000558166749429

In [11]:
data3.to_csv(file3.replace("v10","v10_50"))

In [13]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data3['nuc_mcm_het']+data3['nuc_mcm_eu']))/np.sum(data3['nuc_mcm_total']),
(np.mean(data3['inner_mcm_het'])+np.mean(data3['inner_mcm_eu']))/np.mean(data3['inner_mcm_total']),
(np.sum(data3['outer_mcm_het']+data3['outer_mcm_eu']))/np.sum(data3['outer_mcm_total']))

1.0 1.0 1.0


In [14]:
print(
(np.sum(data3['inner_mcm_het']+data3['outer_mcm_het']))/np.sum(data3['nuc_mcm_het']),
(np.mean(data3['outer_mcm_eu'])+np.mean(data3['inner_mcm_eu']))/np.mean(data3['nuc_mcm_eu']),
(np.sum(data3['nuc_mcm_het'])+np.sum(data3['nuc_mcm_eu']))/np.sum(data3['nuc_mcm_total']),
(np.sum(data3['inner_mcm_total']+data3['outer_mcm_total'])/np.sum(data3['nuc_mcm_total'])))

1.0 1.0000000000000002 1.0 1.0
