In [1]:
import os
from os import listdir
from os.path import isfile, join

import pandas as pd
from itertools import islice
import numpy as np

from skimage.external import tifffile
from skimage.external.tifffile import imread

import matplotlib.pyplot as plt

import ipywidgets as widgets

import random


from scipy.ndimage.morphology import binary_erosion as br
from skimage import morphology as skmor

from scipy import ndimage
from PIL import Image, ImageDraw

import numpy.ma as ma

from skimage.measure import label, regionprops

## to calculate
1.	nucleus_DAPI_total
2.	outer_DAPI_total
3.	inner_DAPI_total
4.	nucleus_DAPI_hetChrom
5.	outer_DAPI_hetChrom
6.	inner_DAPI_hetChrom
7.	nucleus_DAPI_euChrom
8.	outer_DAPI_euChrom
9.	inner_DAPI_euChrom



# gathering files
deleting the columns with the signals to recalculate and collect data

In [2]:
bigDir=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading'

In [3]:
newfile=f'{bigDir}\\new dataset\signals_new_v1.csv'

In [4]:
newdata=pd.read_csv(newfile)

In [5]:
newdata=newdata.drop(columns=['Unnamed: 0'])

In [6]:
newdata.head()

Unnamed: 0,file,original cell name,movie,last frame movie position x,last frame movie position y,ab1 (MCM),ab1 ch,ab2 (heterochromatin),ab2 ch,DAPI ch,...,outer_vol_het,nuc_mcm_het,inner_mcm_het,outer_mcm_het,nuc_vol_eu,inner_vol_eu,outer_vol_eu,nuc_mcm_eu,inner_mcm_eu,outer_mcm_eu
0,20191203_cell-00.czi,20191125-01-17,191125pcnat_dhbr_cdc6vxy01,678.301,200.931,MCM3,1,HP1beta,2,3,...,23479.0,790208510.0,467272258.0,322936252.0,210619.0,46905.0,163714.0,2265887000.0,726723208.0,1539164000.0
1,20191203_cell-01.czi,20191125-01-20,191125pcnat_dhbr_cdc6vxy01,1552.152,240.479,MCM3,1,HP1beta,2,3,...,27545.0,322439299.0,195401167.0,127038132.0,268802.0,76121.0,192681.0,1265473000.0,383366957.0,882105600.0
2,20191203_cell-02.czi,20191125-01-23,191125pcnat_dhbr_cdc6vxy01,1131.436,318.53,MCM3,1,HP1beta,2,3,...,26820.0,317816716.0,199330897.0,118485819.0,264475.0,74585.0,189890.0,1193524000.0,376574781.0,816949000.0
3,20191203_cell-03.czi,20191125-01-25,191125pcnat_dhbr_cdc6vxy01,1650.585,324.968,MCM3,1,HP1beta,2,3,...,25209.0,410056749.0,187572061.0,222484688.0,179149.0,28511.0,150638.0,1357483000.0,271394033.0,1086089000.0
4,20191203_cell-04.czi,20191125-01-32,191125pcnat_dhbr_cdc6vxy01,1466.687,426.815,MCM3,1,HP1beta,2,3,...,26823.0,760191271.0,469390058.0,290801213.0,253820.0,57341.0,196479.0,2236647000.0,710565425.0,1526082000.0


# DAPI calculations

In [7]:
def calculate_DAPI_signals (data):
    # progress bar
    i=0
    progBar=widgets.IntProgress(
        value=i,
        min=0,
        max=len(data),
        step=1,
        description='Progress:',
        orientation='horizontal'
    )
    display(progBar)
    
    for i,myCell in islice(data.iterrows(), i,None):
        ## opening cell image
        try:
            myImage=imread(myCell.path) 
        except:
            myCell.path=myCell.path.replace('.tif','.tiff')
            myImage=imread(myCell.path)
        
        ## channel for DAPI calculations is 2 for both datasets
        myChannel=myImage[:,2,:,:]

        ## setting arrays for names 
        
        ### for column names
        pixels=['total','het','eu']
        components=['nuc','inner','outer']
        
        ### for masks names
        dirs=['segmentation_nucleus_Otsu','erosion_nuc_mask','erosion_nuc_mask',
              'segmentation_20','segmentation_20_erosion','segmentation_20_erosion',
             'segmentation_20_eu','segmentation_20_eu_erosion','segmentation_20_eu_erosion',]

        files=['_nucleus.tif','_eroded_9^5_inner_mask_20.tif','_eroded_9^5_outer_mask_20.tif',
                   '_hetChrom.tif', '_hetChrom_inner.tif', '_hetChrom_outer.tif',
                   '_euChrom.tif', '_euChrom_inner.tif','_euChrom_outer.tif']

        df=pd.DataFrame(columns=['nuc_vol_total','inner_vol_total','outer_vol_total',
                                 'nuc_mcm_total','inner_mcm_total','outer_mcm_total',
                                'nuc_vol_het','inner_vol_het','outer_vol_het',
                                 'nuc_mcm_het','inner_mcm_het','outer_mcm_het',
                                 'nuc_vol_eu','inner_vol_eu','outer_vol_eu',
                                'nuc_mcm_eu','inner_mcm_eu', 'outer_mcm_eu'])

        ## calculating signals
        for k in range(len(pixels)):
            for j in range(len(components)):
                ## copying image every time new signal is calculated
                myIm=myImage.copy()
                myChannel=myIm[:,2,:,:]
                
                ## column name set up based on the component and the pixels
                currColName=f'{components[j]}_DAPI_{pixels[k]}'

                ## opening mask images
                ### the indexing is in such way so that the image opened matches up with the name being used for calculation
                direc=myCell.path.replace('data_tiff',dirs[(j)+(k*3)])
                try:
                    direc=direc.replace('.tif',files[(k*3)+(j)])
                    mask=imread(direc)  
                except:
                    direc=direc.replace('_nucleus.tiff',files[(k*3)+(j)])
                    mask=imread(direc)

                ## making the mask as binary
                mask=mask.astype(bool)
                
                ## masking the image with the mask
                myChannel[~mask]=0
                
                ## calculating signals
                df[f'{currColName}']=myChannel.sum(-1).sum(-1)
        
        ## saving signals into original/big dataframe
        for myMeasurement in df.columns:
            myString=f"data.loc[i,'{myMeasurement}']=np.sum(df.{myMeasurement})"
            exec(myString)
        
        
        
        progBar.value=progBar.value+1

In [8]:
## calculating signals for new dataset
calculate_DAPI_signals(newdata)

IntProgress(value=0, description='Progress:', max=324)

In [9]:
## checking if signal calculations resulted in the way we have expected
(np.sum(newdata['inner_DAPI_total'])+np.sum(newdata['outer_DAPI_total']))/np.sum(newdata['nuc_DAPI_total'])

0.9999999985787311

In [10]:
## checking if signal calculations resulted in the way we have expected
print((np.sum(newdata['inner_DAPI_het'])+np.sum(newdata['outer_DAPI_het']))/np.sum(newdata['nuc_DAPI_het']),
(np.sum(newdata['inner_DAPI_eu'])+np.sum(newdata['outer_DAPI_eu']))/np.sum(newdata['nuc_DAPI_eu']))

1.0000000021541515 0.9999999983596558


In [11]:
## saving the dataset to csv file
newdata.to_csv(newfile.replace('v1','v2'))

the data/signal calculations that were done in this file will be retreived in the other file - '200221_DAPI_calculations'