In [1]:
import os
from os import listdir
from os.path import isfile, join

import pandas as pd
from itertools import islice
import numpy as np

from skimage.external import tifffile
from skimage.external.tifffile import imread

import matplotlib.pyplot as plt

import ipywidgets as widgets

import random


from scipy.ndimage.morphology import binary_erosion as br
from skimage import morphology as skmor

from scipy import ndimage
from PIL import Image, ImageDraw

import numpy.ma as ma

from skimage.measure import label, regionprops

In [2]:
bigDir=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading\20200829_h3k9me3'

In [3]:
file=f'{bigDir}\\200819_cellinfo_v5_20.csv'

In [4]:
data=pd.read_csv(file)

In [5]:
data.head()

Unnamed: 0.1,Unnamed: 0,file,original cell name,movie,x,y,ab1 (MCM),ab1 ch,ab2 (heterochromatin),ab2 ch,...,outer_mcm_eu,nuc_DAPI_total,inner_DAPI_total,outer_DAPI_total,nuc_DAPI_het,inner_DAPI_het,outer_DAPI_het,nuc_DAPI_eu,inner_DAPI_eu,outer_DAPI_eu
0,0,20200819_cell2_01.ciz,20200818xy19_6.czi,200819xy19,1526.899,85.295,H3k9me3,1,MCM2,2,...,412242108.0,1691591000.0,588274600.0,1103316000.0,671761500.0,374096140.0,297665348.0,1019829000.0,214178506.0,805650700.0
1,1,20200819_cell2_02.ciz,20200818xy19_8.czi,200819xy19,1112.568,94.321,H3k9me3,1,MCM2,2,...,182705486.0,1772011000.0,515664900.0,1256346000.0,647514000.0,257151451.0,390362516.0,1124497000.0,258513498.0,865983200.0
2,2,20200819_cell2_03.ciz,20200818xy19_22.czi,200819xy19,1412.269,290.826,H3k9me3,1,MCM2,2,...,325116373.0,1842539000.0,643141100.0,1199398000.0,715250200.0,433930729.0,281319452.0,1127289000.0,209210367.0,918078200.0
3,3,20200819_cell2_04.ciz,20200818xy19_25.czi,200819xy19,1764.52,336.1,H3k9me3,1,MCM2,2,...,182259016.0,1922737000.0,376218700.0,1546518000.0,791833000.0,283312657.0,508520331.0,1130904000.0,92906037.0,1037998000.0
4,4,20200819_cell2_05.ciz,20200818xy19_27.czi,200819xy19,188.411,367.506,H3k9me3,1,MCM2,2,...,244353103.0,3562952000.0,1292840000.0,2270112000.0,1174007000.0,675340998.0,498665524.0,2388946000.0,617498741.0,1771447000.0


# hp1b calculations

In [9]:
def calculate_hp1b_signals (name, data, percentage):
    
    ## if the given dataset is new, the hp1 channel is 1
    ## else, hp1 channel is 0 (for original dataset)
    if name=="new":
        channel=1
    else:
        channel=0
    i=0
    progBar=widgets.IntProgress(
        value=i,
        min=0,
        max=len(data),
        step=1,
        description='Progress:',
        orientation='horizontal'
    )
    percentageCpy=percentage
    display(progBar)
    for i,myCell in islice(data.iterrows(), i,None):
        if myCell.group == 'CTL':
            percentage = 20
        else:
            percentage = percentageCpy
        ## opening cell image
        try:
            myImage=imread(myCell.path) 
        except:
            myCell.path=myCell.path.replace('.tif','.tiff')
            myImage=imread(myCell.path)
        
        ## setting arrays with names 
        
        ### for the column names
        pixels=['total','het','eu']
        components=['nuc','inner','outer']
        
        ### for image names
        dirs=['segmentation_nucleus_Otsu','erosion_nuc_mask','erosion_nuc_mask',
              f'segmentation_{percentage}',f'segmentation_{percentage}_erosion',f'segmentation_{percentage}_erosion',
             f'segmentation_{percentage}_eu',f'segmentation_{percentage}_eu_erosion',f'segmentation_{percentage}_eu_erosion',]

        files=['_nucleus.tif','_eroded_9^5_inner_mask_20.tif','_eroded_9^5_outer_mask_20.tif',
                   '_hetChrom.tif', '_hetChrom_inner.tif', '_hetChrom_outer.tif',
                   '_euChrom.tif', '_euChrom_inner.tif','_euChrom_outer.tif']

        ## making a dummy dataframe to store the signal calculations
        df=pd.DataFrame(columns=['nuc_h3k9me3_total','inner_h3k9me3_total','outer_h3k9me3_total',
                                 'nuc_h3k9me3_het','inner_h3k9me3_het','outer_h3k9me3_het',
                                'nuc_h3k9me3_eu','inner_h3k9me3_eu', 'outer_h3k9me3_eu'])

        ## signals calculated
        for k in range(len(pixels)):
            for j in range(len(components)):
                ## copying image every time new signal is calculated
                myIm=myImage.copy()
                myChannel=myIm[:,channel,:,:] 
                
                ## column name set up based on the component and the pixels
                currColName=f'{components[j]}_h3k9me3_{pixels[k]}'
                
                ## opening mask images
                ### the indexing is in such way so that the image opened matches up with the name being used for calculation
                direc=myCell.path.replace('data_tiff',dirs[(j)+(k*3)])
                try:
                    direc=direc.replace('.tif',files[(k*3)+(j)])
                    mask=imread(direc)  
                except:
                    direc=direc.replace('_nucleus.tiff',files[(k*3)+(j)])
                    mask=imread(direc)

                ## changing mask type to binary
                mask=mask.astype(bool)
                
                ## masking the image
                myChannel[~mask]=0
                
                ## calculating the signal
                df[f'{currColName}']=myChannel.sum(-1).sum(-1)

                ## calculating the volume of the pixels
                currColName=f'{components[j]}_vol_{pixels[k]}'
                df.loc[0,f'{currColName}']=np.sum(mask)
        
        ## saving the calculated signals to original/big dataframe
        for myMeasurement in df.columns:
            myString=f"data.loc[i,'{myMeasurement}']=np.sum(df.{myMeasurement})"
            exec(myString)
        

        progBar.value=progBar.value+1
        

In [11]:
## running the function for new dataset
calculate_hp1b_signals ("2020", data)

IntProgress(value=0, description='Progress:', max=146)

In [11]:
d=(20,30,40,50,60)
for i in d:
    
    file=f'{bigDir}\\200819_cellinfo_v5_{i}.csv'
    data=pd.read_csv(file)
    
    calculate_hp1b_signals("h3k9me3", data,i)
    data.to_csv(file.replace('v5','v6'))
    

IntProgress(value=0, description='Progress:', max=113)

IntProgress(value=0, description='Progress:', max=113)

IntProgress(value=0, description='Progress:', max=113)

IntProgress(value=0, description='Progress:', max=113)

IntProgress(value=0, description='Progress:', max=113)

## calculate geminin data

In [5]:
calculate_hp1b_signals("new",data)

IntProgress(value=0, description='Progress:', max=145)

In [6]:
## testing to see if the calculated signals have results as expected
print(
(np.sum(data['nuc_hp1b_het']+data['nuc_hp1b_eu']))/np.sum(data['nuc_hp1b_total']),
(np.mean(data['inner_hp1b_het'])+np.mean(data['inner_hp1b_eu']))/np.mean(data['inner_hp1b_total']),
(np.sum(data['outer_hp1b_het']+data['outer_hp1b_eu']))/np.sum(data['outer_hp1b_total']))

print(
(np.sum(data['inner_hp1b_het']+data['outer_hp1b_het']))/np.sum(data['nuc_hp1b_het']),
(np.mean(data['outer_hp1b_eu'])+np.mean(data['inner_hp1b_eu']))/np.mean(data['nuc_hp1b_eu']),
(np.sum(data['nuc_hp1b_het'])+np.sum(data['nuc_hp1b_eu']))/np.sum(data['nuc_hp1b_total']),
(np.sum(data['inner_hp1b_total']+data['outer_hp1b_total'])/np.sum(data['nuc_hp1b_total'])))

1.0 1.0 1.0
1.0 1.0 1.0 1.0


In [7]:
## saving the dataset with calculated signals
data.to_csv(file.replace('v7','v8'))