In [7]:
import os
from os import listdir
from os.path import isfile, join

import pandas as pd
from itertools import islice
import numpy as np

from skimage.external import tifffile
from skimage.external.tifffile import imread

import matplotlib.pyplot as plt

import ipywidgets as widgets

import random

from scipy.ndimage.morphology import binary_erosion as br
from skimage import morphology as skmor

from scipy import ndimage
from PIL import Image, ImageDraw

import numpy.ma as ma

from skimage.measure import label, regionprops

## to calculate
1.	nucleus_MCM_total
2.	outer_MCM_total
3.	inner_MCM_total
4.	nucleus_MCM_hetChrom
5.	outer_MCM_hetChrom
6.	inner_MCM_hetChrom
7.	nucleus_MCM_euChrom
8.	outer_MCM_euChrom
9.	inner_MCM_euChrom


# H3K9ME3 

In [2]:
bigDir=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200604_h3k9me3'

In [3]:
file=f'{bigDir}\\cellinfo_200604v3.csv'
data=pd.read_csv(file)

# H3K9ME3 2nd dataset

In [2]:
bigDir=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200615_h3k9me3'

In [8]:
file2=f'{bigDir}\\cellinfo_200615v3.csv'
data2=pd.read_csv(file2)

In [9]:
data2.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,file,original_name,movie,last frame movie position x,last frame movie position y,ab1 (MCM),ab1 ch,ab2 (heterochromatin),...,outer_vol_het,nuc_mcm_het,inner_mcm_het,outer_mcm_het,nuc_vol_eu,inner_vol_eu,outer_vol_eu,nuc_mcm_eu,inner_mcm_eu,outer_mcm_eu
0,0,0,20200615_cell_00.ciz,20200615_29_11_63x.czi,200614xy29,1781.995,159.93,H3K9Me3,1,MCM2,...,30994.0,398492000.0,159515917.0,238976105.0,193173.0,24960.0,168213.0,767247700.0,213462279.0,553785400.0
1,1,1,20200615_cell_01.ciz,20200615_29_18_63x.czi,200614xy29,1534.667,302.703,H3K9Me3,1,MCM2,...,29160.0,600020800.0,302931639.0,297089113.0,213273.0,18604.0,194669.0,1104509000.0,218566044.0,885942600.0
2,2,2,20200615_cell_02.ciz,20200615_29_23_63x.czi,200614xy29,558.209,366.997,H3K9Me3,1,MCM2,...,35870.0,928613700.0,407875170.0,520738572.0,227901.0,25767.0,202134.0,1606101000.0,427398661.0,1178703000.0
3,3,3,20200615_cell_03.ciz,20200615_29_24_63x.czi,200614xy29,1147.608,371.829,H3K9Me3,1,MCM2,...,32686.0,1303478000.0,471494573.0,831983523.0,191006.0,20367.0,170639.0,2438647000.0,641162396.0,1797484000.0
4,4,4,20200615_cell_04.ciz,20200615_29_31_63x.czi,200614xy29,1649.449,469.52,H3K9Me3,1,MCM2,...,25166.0,881289800.0,568747640.0,312542147.0,248842.0,24887.0,223955.0,1269950000.0,287470979.0,982478700.0


# H3k9ME3 3rd dataset with MgCl2

In [12]:
bigDir=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200829_h3k9me3'

file=f'{bigDir}\\200819_cellinfo_v2.csv'
data=pd.read_csv(file)

# mcm calculations

In [41]:
def calculate_mcm_signals (name, data,percentage):
    data=data
    ## if the given dataset is new, the mcm channel is 0
    ## else, mcm channel is 1 (for original dataset)
    if name=="new":
        mcm=0
    else:
        mcm=1
    i=0
    progBar=widgets.IntProgress(
        value=i,
        min=0,
        max=len(data),
        step=1,
        description='Progress:',
        orientation='horizontal'
    )
    display(progBar)
    for i,myCell in islice(data.iterrows(), i,None):
        # control group's percentage will be 20%
        if myCell.group == "CTL":
            percentage = 20
        
        # i = index of the row
        # myCell the entire row of the that index i = data.iloc[i]
        
        ## opening cell image
        try:
            myImage=imread(myCell.path) 
        except:
            myCell.path=myCell.path.replace('.tif','.tiff')
            myImage=imread(myCell.path)
        
        ## setting arrays with names 
        
        ### for the column names
        pixels=['total','het','eu']
        components=['nuc','inner','outer']
        
        ### for image names
        dirs=['segmentation_nucleus_Otsu','erosion_nuc_mask','erosion_nuc_mask',
              f'segmentation_{percentage}',f'segmentation_{percentage}_erosion',f'segmentation_{percentage}_erosion',
             f'segmentation_{percentage}_eu',f'segmentation_{percentage}_eu_erosion',f'segmentation_{percentage}_eu_erosion']
        
        files=['_nucleus.tif',f'_eroded_9^5_inner_mask_20.tif',f'_eroded_9^5_outer_mask_20.tif',
                   '_hetChrom.tif', '_hetChrom_inner.tif', '_hetChrom_outer.tif',
                   '_euChrom.tif', '_euChrom_inner.tif','_euChrom_outer.tif']

        ## making a dummy dataframe to store the signal calculations
        df=pd.DataFrame(columns=['nuc_vol_total','inner_vol_total','outer_vol_total',
                                 'nuc_mcm_total','inner_mcm_total','outer_mcm_total',
                                'nuc_vol_het','inner_vol_het','outer_vol_het',
                                 'nuc_mcm_het','inner_mcm_het','outer_mcm_het',
                                 'nuc_vol_eu','inner_vol_eu','outer_vol_eu',
                                'nuc_mcm_eu','inner_mcm_eu', 'outer_mcm_eu'])

        ## signals calculated
        for k in range(len(pixels)): # 0 to 2 - 3 values for total, het, eu
            for j in range(len(components)): 
                # 0 to 2 
                           
                # 1 loop
                # total (k=0) , nuc  (j=0)
                # total (k=0), inner (j=1)
                # total (k=0), outer (j=2)
                           
                ## copying image every time new signal is calculated
                myIm=myImage.copy()
                myChannel=myIm[:,mcm,:,:] 
                
                ## column name set up based on the component and the pixels
                currColName=f'{components[j]}_mcm_{pixels[k]}' # nuc_mcm_total
                
                ## opening mask images
                ### the indexing is in such way so that the image opened matches up with the name being used for calculation
                direc=myCell.path.replace('data_tiff',dirs[(j)+(k*3)]) 
                    # dir[0+0] = dir[0]= segmentation_nuclues_otsu
                    # dirs[1+0] = dir[1] = erosion_nuc_mask
                    # dir[2+0] = dir[2] = erosion_nuc_mask
                
                           
                try:
                    direc=direc.replace('.tif',files[(k*3)+(j)])
                    mask=imread(direc)  
                except:
                    direc=direc.replace('_nucleus.tiff',files[(k*3)+(j)])
                    mask=imread(direc)

                ## changing mask type to binary
                mask=mask.astype(bool)
                
                ## masking the image
                myChannel[~mask]=0
                
                ## calculating the signal
                df[f'{currColName}']=myChannel.sum(-1).sum(-1)
                           
                ## calculating the volume of the pixels
                currColName=f'{components[j]}_vol_{pixels[k]}'
                df.loc[0,f'{currColName}']=np.sum(mask)
        
        ## saving the calculated signals to original/big dataframe
        for myMeasurement in df.columns:
            myString=f"data.loc[i,'{myMeasurement}']=np.sum(df.{myMeasurement})"
            exec(myString)
        
        
        progBar.value=progBar.value+1
        
    return data
    
        

In [24]:
data.head()

Unnamed: 0.1,Unnamed: 0,file,original cell name,movie,x,y,ab1 (MCM),ab1 ch,ab2 (heterochromatin),ab2 ch,...,outer_vol_het,nuc_mcm_het,inner_mcm_het,outer_mcm_het,nuc_vol_eu,inner_vol_eu,outer_vol_eu,nuc_mcm_eu,inner_mcm_eu,outer_mcm_eu
0,0,20200819_cell2_01.ciz,20200818xy19_6.czi,200819xy19,1526.899,85.295,H3k9me3,1,MCM2,2,...,23619.0,260522503.0,147526591.0,112995912.0,201124.0,20110.0,181014.0,510745670.0,98503562.0,412242108.0
1,1,20200819_cell2_02.ciz,20200818xy19_8.czi,200819xy19,1112.568,94.321,H3k9me3,1,MCM2,2,...,22230.0,84023553.0,32253902.0,51769651.0,140519.0,14303.0,126216.0,217423832.0,34718346.0,182705486.0
2,2,20200819_cell2_03.ciz,20200818xy19_22.czi,200819xy19,1412.269,290.826,H3k9me3,1,MCM2,2,...,22173.0,172845122.0,104295518.0,68549604.0,187288.0,14819.0,172469.0,386202258.0,61085885.0,325116373.0
3,3,20200819_cell2_04.ciz,20200818xy19_25.czi,200819xy19,1764.52,336.1,H3k9me3,1,MCM2,2,...,25455.0,73984324.0,25770708.0,48213616.0,151186.0,4945.0,146241.0,192542688.0,10283672.0,182259016.0
4,4,20200819_cell2_05.ciz,20200818xy19_27.czi,200819xy19,188.411,367.506,H3k9me3,1,MCM2,2,...,29935.0,92392864.0,51089171.0,41303693.0,254331.0,33198.0,221133.0,292689010.0,48335907.0,244353103.0


In [42]:
data_10=calculate_mcm_signals("h3k9me3", data, 10)

IntProgress(value=0, description='Progress:', max=114)

In [44]:
data_10.nuc_vol_het/data_10.nuc_vol_total

0      0.200000
1      0.200024
2      0.200065
3      0.200040
4      0.200033
5      0.200033
6      0.200049
7      0.200038
8      0.200060
9      0.200082
10     0.200061
11     0.200002
12     0.200007
13     0.200016
14     0.200034
15     0.200004
16     0.200006
17     0.200023
18     0.200064
19     0.200012
20     0.200033
21     0.200021
22     0.200012
23     0.200022
24     0.200043
25     0.200027
26     0.200053
27     0.200020
28     0.200009
29     0.200032
         ...   
84     0.200030
85     0.200012
86     0.200016
87     0.200038
88     0.200006
89     0.200000
90     0.200003
91     0.200036
92     0.200011
93     0.200014
94     0.200010
95     0.200018
96     0.200024
97     0.200052
98     0.200039
99     0.200017
100    0.200031
101    0.200066
102    0.200001
103    0.200011
104    0.200017
105    0.200002
106    0.200031
107    0.200016
108    0.200005
109    0.200033
110    0.200009
111    0.200026
112    0.200014
113    0.200014
Length: 114, dtype: floa

In [17]:
d = (10,20,30)
for i in d:
    calculate_mcm_signals("h3k9me3", data, i)
    data.to_csv(f'{bigDir}\\200819_cellinfo_v3_{i}.csv')

IntProgress(value=0, description='Progress:', max=114)

IntProgress(value=0, description='Progress:', max=114)

IntProgress(value=0, description='Progress:', max=114)

### mcm calculations for h3k9me3 dataset

In [5]:
## running the function for geminin dataset
data_10=calculate_mcm_signals ("h3k9me3", data,10) # MCM channel = 2nd 

IntProgress(value=0, description='Progress:', max=78)

In [6]:
data_10['temp']=data_10.nuc_vol_het/data_10.nuc_vol_total
data_10['temp']

0     0.100020
1     0.100014
2     0.100013
3     0.100003
4     0.100028
        ...   
73    0.100022
74    0.100010
75    0.100004
76    0.100026
77    0.100029
Name: temp, Length: 78, dtype: float64

In [7]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data_10['nuc_mcm_het']+data_10['nuc_mcm_eu']))/np.sum(data_10['nuc_mcm_total']),
(np.mean(data_10['inner_mcm_het'])+np.mean(data_10['inner_mcm_eu']))/np.mean(data_10['inner_mcm_total']),
(np.sum(data_10['outer_mcm_het']+data_10['outer_mcm_eu']))/np.sum(data_10['outer_mcm_total']))

1.0 1.0 1.0


In [8]:
print(
(np.sum(data_10['inner_mcm_het']+data_10['outer_mcm_het']))/np.sum(data_10['nuc_mcm_het']),
(np.mean(data_10['outer_mcm_eu'])+np.mean(data_10['inner_mcm_eu']))/np.mean(data_10['nuc_mcm_eu']),
(np.sum(data_10['nuc_mcm_het'])+np.sum(data_10['nuc_mcm_eu']))/np.sum(data_10['nuc_mcm_total']),
(np.sum(data_10['inner_mcm_total']+data_10['outer_mcm_total'])/np.sum(data_10['nuc_mcm_total'])))

1.0 1.0 1.0 1.0


In [9]:
## saving the dataset with calculated signals
data_10.to_csv(file.replace('v3','v4_10percent'))

In [10]:
## running the function for geminin dataset
data_50=calculate_mcm_signals ("h3k9me3", data,50) # MCM channel = 2nd 

IntProgress(value=0, description='Progress:', max=78)

In [11]:
data_50['temp']=data_50.nuc_vol_het/data_50.nuc_vol_total
data_50['temp']

0     0.500012
1     0.500009
2     0.500041
3     0.500030
4     0.500053
        ...   
73    0.500041
74    0.500035
75    0.500002
76    0.500068
77    0.500046
Name: temp, Length: 78, dtype: float64

In [12]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data_50['nuc_mcm_het']+data_50['nuc_mcm_eu']))/np.sum(data_50['nuc_mcm_total']),
(np.mean(data_50['inner_mcm_het'])+np.mean(data_50['inner_mcm_eu']))/np.mean(data_50['inner_mcm_total']),
(np.sum(data_50['outer_mcm_het']+data_50['outer_mcm_eu']))/np.sum(data_50['outer_mcm_total']))

1.0 1.0 1.0


In [13]:
print(
(np.sum(data_50['inner_mcm_het']+data_50['outer_mcm_het']))/np.sum(data_50['nuc_mcm_het']),
(np.mean(data_50['outer_mcm_eu'])+np.mean(data_50['inner_mcm_eu']))/np.mean(data_50['nuc_mcm_eu']),
(np.sum(data_50['nuc_mcm_het'])+np.sum(data_50['nuc_mcm_eu']))/np.sum(data_50['nuc_mcm_total']),
(np.sum(data_50['inner_mcm_total']+data_50['outer_mcm_total'])/np.sum(data_50['nuc_mcm_total'])))

1.0 1.0 1.0 1.0


In [14]:
## saving the dataset with calculated signals
data_50.to_csv(file.replace('v3','v4_50percent'))

### mcm calculations for h3k9me3 2nd dataset

In [11]:
## running the function for geminin dataset
data_10_2=calculate_mcm_signals ("h3k9me3", data2,10) # MCM channel = 2nd 

IntProgress(value=0, description='Progress:', max=88)

In [12]:
data_10_2.file

0     20200615_cell_00.ciz
1     20200615_cell_01.ciz
2     20200615_cell_02.ciz
3     20200615_cell_03.ciz
4     20200615_cell_04.ciz
              ...         
83    20200615_cell_83.ciz
84    20200615_cell_84.ciz
85    20200615_cell_85.ciz
86    20200615_cell_86.ciz
87    20200615_cell_87.ciz
Name: file, Length: 88, dtype: object

In [13]:
data_10_2['temp']=data_10_2.nuc_vol_het/data_10_2.nuc_vol_total
data_10_2['temp']

0     0.100000
1     0.100006
2     0.100004
3     0.100000
4     0.100007
        ...   
83    0.100001
84    0.100018
85    0.100007
86    0.100016
87    0.100003
Name: temp, Length: 88, dtype: float64

In [15]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data_10_2['nuc_mcm_het']+data_10_2['nuc_mcm_eu']))/np.sum(data_10_2['nuc_mcm_total']),
(np.mean(data_10_2['inner_mcm_het'])+np.mean(data_10_2['inner_mcm_eu']))/np.mean(data_10_2['inner_mcm_total']),
(np.sum(data_10_2['outer_mcm_het']+data_10_2['outer_mcm_eu']))/np.sum(data_10_2['outer_mcm_total']))

1.0 0.9999999999999998 1.0


In [16]:
print(
(np.sum(data_10_2['inner_mcm_het']+data_10_2['outer_mcm_het']))/np.sum(data_10_2['nuc_mcm_het']),
(np.mean(data_10_2['outer_mcm_eu'])+np.mean(data_10_2['inner_mcm_eu']))/np.mean(data_10_2['nuc_mcm_eu']),
(np.sum(data_10_2['nuc_mcm_het'])+np.sum(data_10_2['nuc_mcm_eu']))/np.sum(data_10_2['nuc_mcm_total']),
(np.sum(data_10_2['inner_mcm_total']+data_10_2['outer_mcm_total'])/np.sum(data_10_2['nuc_mcm_total'])))

1.0 1.0 1.0 1.0


In [18]:
## saving the dataset with calculated signals
data_10_2.to_csv(file2.replace('v3','v4_10percent'))

In [19]:
## running the function for geminin dataset
data_50=calculate_mcm_signals ("h3k9me3", data2,50) # MCM channel = 2nd 

IntProgress(value=0, description='Progress:', max=88)

In [20]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data_50['nuc_mcm_het']+data_50['nuc_mcm_eu']))/np.sum(data_50['nuc_mcm_total']),
(np.mean(data_50['inner_mcm_het'])+np.mean(data_50['inner_mcm_eu']))/np.mean(data_50['inner_mcm_total']),
(np.sum(data_50['outer_mcm_het']+data_50['outer_mcm_eu']))/np.sum(data_50['outer_mcm_total']))

1.0 1.0 1.0


In [21]:
data_50['temp']=data_50.nuc_vol_het/data_50.nuc_vol_total
data_50['temp']

0     0.500027
1     0.500021
2     0.500000
3     0.500002
4     0.500024
        ...   
83    0.500039
84    0.500027
85    0.500051
86    0.500023
87    0.500070
Name: temp, Length: 88, dtype: float64

In [22]:
print(
(np.sum(data_50['inner_mcm_het']+data_50['outer_mcm_het']))/np.sum(data_50['nuc_mcm_het']),
(np.mean(data_50['outer_mcm_eu'])+np.mean(data_50['inner_mcm_eu']))/np.mean(data_50['nuc_mcm_eu']),
(np.sum(data_50['nuc_mcm_het'])+np.sum(data_50['nuc_mcm_eu']))/np.sum(data_50['nuc_mcm_total']),
(np.sum(data_50['inner_mcm_total']+data_50['outer_mcm_total'])/np.sum(data_50['nuc_mcm_total'])))

1.0 0.9999999999999999 1.0 1.0


In [23]:
## saving the dataset with calculated signals
data_50.to_csv(file2.replace('v3','v4_50percent'))