In [6]:
import os
from os import listdir
from os.path import isfile, join

import pandas as pd
from itertools import islice
import numpy as np

from skimage.external import tifffile
from skimage.external.tifffile import imread

import matplotlib.pyplot as plt

import ipywidgets as widgets

import random

In [7]:
bigDir = r'S:\CookLab\Liu\20190816_organizedData_MCM_loading\DATA'
cellDataFile=r'S:\CookLab\Liu\20190816_organizedData_MCM_loading\sel_cells_v2.csv'

het_percentile=20

In [8]:
cellData=pd.read_csv(cellDataFile)
cellData.head()

Unnamed: 0.1,Unnamed: 0,file,original cell name,Amy_mod,movie,x,y,ab1 (heterochromatin),ab1 ch,ab2 (MCM),...,nucleus_Li_ch3,hetChrom_gestalt_volume,hetChrom_gestalt_ch1,hetChrom_gestalt_ch2,hetChrom_gestalt_ch3,hetChrom_manual_volume,hetChrom_manual_ch1,hetChrom_manual_ch2,hetChrom_manual_ch3,Slice
0,0,20190625_cell-00.czi,7LE1(B),1130/10,190619-cdc6-dhb-pcna002xy09,172,956,HP1B,1,MCM3,...,10351330702,32475,482212200.0,124077516.0,967836300.0,15154,221987710.0,52151363.0,414308600.0,990
1,1,20190625_cell-01.czi,7L-E1(T),0,190619-cdc6-dhb-pcna002xy09,158,906,HP1B,1,MCM3,...,10572215171,27143,399837000.0,101449362.0,858240500.0,15206,203250046.0,47586257.0,414600000.0,861
2,2,20190625_cell-02.czi,7L-E2(B),0,190619-cdc6-dhb-pcna002xy09,832,252,HP1B,1,MCM3,...,11150685101,68552,1064713000.0,330774925.0,2302990000.0,22387,401103554.0,107710929.0,777141000.0,1275
3,3,20190625_cell-03.czi,7L-E2(T),0,190619-cdc6-dhb-pcna002xy09,888,160,HP1B,1,MCM3,...,10474743866,44734,678486600.0,206492972.0,1375000000.0,10894,197715713.0,53272102.0,350647900.0,820
4,4,20190625_cell-05.czi,7L-G2,0,190619-cdc6-dhb-pcna002xy09,172,324,HP1B,1,MCM3,...,21616688744,170443,2681304000.0,503287911.0,5325134000.0,38295,770965697.0,118607458.0,1218426000.0,946


## Segment

In [None]:
i=0

# Progress Bar
progBar=widgets.IntProgress(
    value=i,
    min=0,
    max=len(cellData),
    step=1,
    description='Progress:',
    orientation='horizontal'
)
display(progBar)

for i,myCell in islice(cellData.iterrows(), i,None):

    # create folder to store this percentile data if it doesn't exist yet
    newSegFolder = os.path.dirname(myCell.path).replace('data_tiff',f'segmentation_{het_percentile}')

    try:
        os.mkdir(newSegFolder)
    except:
        pass

    
    # open tiff image of a cell
    myImage=imread(myCell.path)    
    myChannel = myImage[:, 0, :, :] #1st channel is hetChrom (as stained by HP1B or H3K9Me3)

    # open nucleus mask
    nucleusMaskPath=myCell.path
    nucleusMaskPath=nucleusMaskPath.replace('data_tiff','segmentation_nucleus_Otsu')
    nucleusMaskPath=nucleusMaskPath.replace('.tif','_nucleus.tif')
    nucleusMask=imread(nucleusMaskPath)

    # make nucleus mask binary
    nucleusMask = nucleusMask.astype(bool)

    # clean image
    myNucleus = myChannel[nucleusMask]

    # find threshold
    myPercentile = np.nanpercentile(myNucleus, (100-het_percentile)) 

    # threshold
    myHetMask=np.zeros(myChannel.shape).astype('bool')
    myHetMask[myChannel >= myPercentile] = 1
    
    # clean from elements outside the nucleus
    cleanNucleusMask=~nucleusMask
    myHetMask[cleanNucleusMask]=0
    cleanHetMask = ~myHetMask

    # save segmentation mask
    maskFile = os.path.basename(myCell.path).replace('.tif',f'_hetChrom.tif')
    tifffile.imsave(os.path.join(newSegFolder,maskFile), myHetMask.astype('uint8'))

    # calculate signals
    df = pd.DataFrame(columns = ['HetChrom_Volume','HetChrom_total_ch1','HetChrom_total_ch2','HetChrom_total_ch3',
                                 'Slice'])

    df.Slice = range(myChannel.shape[0])
    df.HetChrom_Volume =  myHetMask.sum(-1).sum(-1)

    for ch in range(3):
        myChannel = myImage[:, ch, :, :]
        myChannel[cleanHetMask] = 0
        signal = myChannel.sum(-1).sum(-1)
        exec(f'df.HetChrom_total_ch{ch}=signal')

    # save signals
    dfSavePath=myCell.path
    dfSavePath=dfSavePath.replace('data_tiff','results')
    dfSavePath=dfSavePath.replace('.tif',f'_hetChrom_{str((het_percentile))}.csv')
    df.to_csv(dfSavePath)

    # add info to cell info file
    cellData.loc[i,f'hetChrom_{str(het_percentile)}_volume']=np.sum(df.HetChrom_Volume)
    for ch in range(3):
        exec(f"cellData.loc[i,'hetChrom_{str(het_percentile)}perc_ch{ch+1}']=np.sum(df.HetChrom_total_ch{ch})") 

    progBar.value=progBar.value+1

In [None]:
myChannel = myImage[:, 0, :, :]
myNucleus = myChannel[nucleusMask]
myPercentile = np.nanpercentile(myNucleus, (100-het_percentile)) 
myHetMask=np.zeros(myChannel.shape).astype('bool')
myHetMask[myChannel >= myPercentile] = 1
myHetMask.sum()

In [None]:
het_percentile

In [None]:
myHetMask.sum()

In [None]:
myHetMask.dtype

In [None]:
nucleusMask.sum()

In [None]:
cellData.to_csv(cellDataFile.replace('v4','v5'))

In [None]:
myCell=cellData.loc[257,:]
myCell

In [None]:
myCell.hetChrom_20_volume/myCell.nucleus_Otsu_volume

In [None]:
%matplotlib notebook
h=plt.hist(myNucleus,bins=100)

In [None]:
myPercentile = np.nanpercentile(myNucleus, (100-het_percentile+1)) 
myPercentile

In [None]:
np.sum(myNucleus>myPercentile)/len(myNucleus)

In [None]:
i=257

# Progress Bar
progBar=widgets.IntProgress(
    value=i,
    min=0,
    max=len(cellData),
    step=1,
    description='Progress:',
    orientation='horizontal'
)
display(progBar)

for i,myCell in islice(cellData.iterrows(), i,None):

    # create folder to store this percentile data if it doesn't exist yet
    newSegFolder = os.path.dirname(myCell.path).replace('data_tiff',f'segmentation_random_{hetpercentile}')

    try:
        os.mkdir(newSegFolder)
    except:
        pass

    # changing the drive setting
    cellPath=myCell.path
    cellPath=cellPath.replace('S:','Z:')
    print(cellPath)
    # open tiff image of a cell
    myImage=imread(cellPath)    
    myChannel = myImage[:, 0, :, :] #1st channel is hetChrom (as stained by HP1B or H3K9Me3)

    # open nucleus mask
    nucleusMaskPath=cellPath
    nucleusMaskPath=nucleusMaskPath.replace('data_tiff','segmentation_nucleus_Otsu')
    nucleusMaskPath=nucleusMaskPath.replace('.tif','_nucleus.tif')
    nucleusMask=imread(nucleusMaskPath)

    # make nucleus mask binary
    nucleusMask = nucleusMask.astype(bool)

    # clean image
    myNucleus = myChannel[nucleusMask]
   
    # find threshold
    myPercentile = np.nanpercentile(myNucleus, (100-het_percentile)) 
    
    #find 20% random sample ...?
    #don't think this makes much sense if we are going to make threshold out of this 'percentile'
    #myPercentile=random.sample(list(myNucleus),k=int(len(myNucleus)*.2))
    #print(myPercentile)

    # threshold
    myHetMask=np.zeros(myChannel.shape).astype('bool')
    myHetMask[myChannel >= myPercentile] = 1

    # save segmentation mask
    #maskFile = os.path.basename(myCell.path).replace('.tif',f'_hetChrom.tif')
    #tifffile.imsave(os.path.join(newSegFolder,maskFile), myHetMask.astype('uint8'))

    # calculate signals
    df = pd.DataFrame(columns = ['HetChrom_Volume','HetChrom_total_ch1','HetChrom_total_ch2','HetChrom_total_ch3',
                                 'Slice'])

    df.Slice = range(myChannel.shape[0])
    df.HetChrom_Volume =  myHetMask.sum(-1).sum(-1)
    cleanHetMask = ~myHetMask
    cleanNucleusMask=~nucleusMask

    for ch in range(3):
        myChannel = myImage[:, ch, :, :]
        myChannel[cleanNucleusMask]=0
        myChannel[cleanHetMask] = 0
        signal = myChannel.sum(-1).sum(-1)
        exec(f'df.HetChrom_total_ch{ch}_random20=signal')

    # save signals
    #dfSavePath=myCell.path
    #dfSavePath=dfSavePath.replace('data_tiff','results')
    #dfSavePath=dfSavePath.replace('.tif',f'_hetChrom_{str((het_percentile))}.csv')
    #df.to_csv(dfSavePath)

    # add info to cell info file
    cellData.loc[i,f'hetChrom_{str(hetpercentile)}_volume']=np.sum(df.HetChrom_Volume)
    for ch in range(3):
        exec(f"cellData.loc[i,'hetChrom_random{str(hetpercentile)}perc_ch{ch+1}']=np.sum(df.HetChrom_total_ch{ch}_random20)") 
    break
    progBar.value=progBar.value+1

In [None]:
hetPercentile

In [None]:
myCell

In [None]:
%matplotlib notebook
h=plt.hist(myNucleus,bins=100)

In [None]:
myPercentile

In [None]:
np.sum(myNucleus==myPercentile)

In [None]:
np.sum(myNucleus>=myPercentile)/len(myNucleus)