In [None]:
import napari
import scipy as sp
import numpy as np
import tifffile
import scipy.ndimage as ndi
from skimage.feature import peak_local_max
import os
import glob
import cv2
import skimage as ski
import dask
import plotly.express as px
import pandas as pd
import plotly.graph_objs as go


# Setup Notebook

In [2]:
viewer = napari.Viewer()

In [None]:
# Choose the channel to find CenpA peaks in
peak_channel = 1

In [4]:
scale = [0.10, 0.031, 0.031]

# Utility Functions

In [31]:
def backsub(inp, radius=20):
    filterSize =(radius, radius)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,
                                    filterSize)
    blurred = cv2.GaussianBlur(inp, (5, 5), 0)
    tophat_img = cv2.morphologyEx(blurred,
                                cv2.MORPH_TOPHAT,
                                kernel)
    rtn = inp.astype(np.single) - (blurred-tophat_img)
    rtn = np.clip(rtn, 0, np.inf)
    return rtn
def backsub_3d(inp, radius=20):
    shape = inp.shape
    reshaped = inp.reshape(-1, shape[-2], shape[-1])
    process = [dask.delayed(backsub)(i, radius) for i in reshaped]
    rslt = np.array(dask.compute(*process))
    rslt = rslt.reshape(shape)
    return rslt

In [32]:
def filter_label_size(labels, min_size=10, max_size=100):
    from skimage import measure
    label_sizes = np.bincount(labels.ravel())
    too_small = label_sizes < min_size
    too_large = label_sizes > max_size
    labels[np.isin(labels, np.where(too_small | too_large))] = 0
    labels = ski.segmentation.relabel_sequential(labels)[0]
    return labels

In [57]:
def find_peaks(img, threshold=0.1, display=False):
    #LoG = -ndi.gaussian_laplace(img/1000, sigma=[2,4,4])
    #LoG = -ndi.gaussian_laplace(img/1000, sigma=[1,2,2])
    
    img = ski.transform.rescale(img, [0.5,1,1], order=1, preserve_range=True)
    LoG = -ndi.gaussian_laplace(img/1000, sigma=[2,2,2])
    max_peaks = peak_local_max(LoG, min_distance=1, threshold_rel=threshold)
    if display:
        viewer.add_image(LoG, blending='additive', colormap='magenta', scale=scale)
        viewer.add_points(max_peaks, n_dimensional=True, size=6, scale=scale, name='AllPeaks')
    return max_peaks

def pair_finder(peaks, max_dist=6, scale=[0.12, 0.04, 0.04]):
    scaled_peaks = peaks.copy().astype(float)
    scaled_peaks[:,0] = scaled_peaks[:,0] * scale[0]
    scaled_peaks[:,1] = scaled_peaks[:,1] * scale[1]
    scaled_peaks[:,2] = scaled_peaks[:,2] * scale[2]
    
    d_matrix = sp.spatial.distance.squareform(sp.spatial.distance.pdist(scaled_peaks))
    d_matrix[d_matrix==0] = 10000
    
    min_pos = np.argmin(d_matrix, axis=0)
    distances = np.min(d_matrix, axis=0)
    sorte = np.argsort(distances)
    
    return_distances = []
    lst = []
    for a in np.arange(0,len(sorte)):
        if distances[sorte[a]]<max_dist:
            if any([all (peaks[sorte[a]]==b) for b in lst]) | any([all(peaks[min_pos][sorte[a]]==b) for b in lst]):
                lst
            elif np.abs(peaks[sorte[a]][0]-peaks[min_pos][sorte[a]][0])>6:
                lst
            else:
                lst.append(peaks[sorte[a]])
                lst.append(peaks[min_pos][sorte[a]])
                return_distances.append(distances[sorte[a]])
    return lst, return_distances

def find_peaks_in_file(fname, display=False, threshold=0.16, viewer=None):
    img = tifffile.imread(fname)
    peaks = find_peaks(img[:,peak_channel,:,:], threshold=threshold, display=display)
    filtered_peaks, distances = pair_finder(peaks, max_dist=1.2, scale=scale)
    
    if display:
        viewer.add_image(img, channel_axis=1, scale=scale)
        viewer.add_points(filtered_peaks[::2], n_dimensional=True, size=6, scale=scale, name='FilteredPeaks', face_color='magenta')
        viewer.add_points(filtered_peaks[1::2], n_dimensional=True, size=6, scale=scale, name='FilteredPeaks', face_color='yellow')
    return filtered_peaks, distances

In [58]:
def get_intensity_df(int_img, labels):
    df = pd.DataFrame(ski.measure.regionprops_table(labels, intensity_image=int_img, properties=['label', 'mean_intensity', 'area']))
    df['total_intensity'] = df['mean_intensity'] * df['area']
    return df.drop(columns=['area'])

def get_props_df(labels):
    df = pd.DataFrame(ski.measure.regionprops_table(labels, properties=['label', 'centroid', 'area']))
    return df

def get_all_props(int_img, labels):
    props_df = get_props_df(labels)
    c0_df = get_intensity_df(int_img[0], labels)
    c1_df = get_intensity_df(int_img[1], labels)
    c2_df = get_intensity_df(int_img[2], labels)
    c3_df = get_intensity_df(int_img[3], labels)
    all_df = props_df.merge(c0_df, on='label', suffixes=('', '_c0'))
    all_df = all_df.merge(c1_df, on='label', suffixes=('', '_c1'))
    all_df = all_df.merge(c2_df, on='label', suffixes=('', '_c2'))
    all_df = all_df.merge(c3_df, on='label', suffixes=('', '_c3'))
    return all_df


In [59]:
def get_mask(peak, img_shape, radius=6):
    blank_img = np.zeros(img_shape)
    cv2.line(blank_img, tuple(peak[0].astype(int)[::-1]), tuple(peak[1].astype(int)[::-1]), color=255, thickness=2)
    edt = ndi.distance_transform_edt(blank_img==0)
    binary = edt < radius
    return binary

def process_file(fname, viewer=None, display=False, max_distance=20):

    # Get peak pairs and distances
    peaks, distances = find_peaks_in_file(fname, display=display, viewer=viewer, threshold=0.16)
    pks_grouped = np.reshape(peaks, (-1, 2, 3))
    centers = np.mean(pks_grouped, axis=1)[:,1:3]
    if len(centers) < 4:
        print(f"Not enough peaks found in {fname}. Skipping file.")
        return pd.DataFrame()

    img = ski.io.imread(fname)
    img = np.moveaxis(img, -1, 0)
    img = backsub_3d(img, radius=20)

    # Get the brightest 2 objects in the FISH channel (0)
    fimg = img[0,:,:,:]
    pimg = fimg.sum(axis=0)[:,:]
    otsu = 0.25 * ski.filters.threshold_otsu(pimg)
    binary = pimg > otsu
    labels = ski.measure.label(binary)
    if np.max(labels) < 2:
        print(f"Not enough FISH labels found in {fname}. Skipping file.")
        return pd.DataFrame()
    df = pd.DataFrame(ski.measure.regionprops_table(labels, intensity_image=pimg, properties=['label', 'area', 'centroid', 'mean_intensity']))
    labels = filter_label_size(labels, min_size=20, max_size=500)
    df = pd.DataFrame(ski.measure.regionprops_table(labels, intensity_image=pimg, properties=['label', 'area', 'centroid', 'mean_intensity']))
    df['total_intensity'] = df['mean_intensity'] * df['area']
    df = df.sort_values(by='total_intensity', ascending=False).rename(columns={'centroid-0': 'y', 'centroid-1': 'x'})
    if display:
        viewer.add_labels(labels, name='FISH Labels', scale=scale[1:])
    if len(df) < 2:
        print(f"Not enough FISH peaks found in {fname}. Skipping file.")
        return pd.DataFrame()

    # Get the bright and dim peaks from the FISH channel, calculate distances to all pairs' centers
    bright = df.iloc[0][['y','x']].values
    dim = df.iloc[1][['y','x']].values
    bright_distances = np.linalg.norm(centers - bright, axis=1)
    dim_distances = np.linalg.norm(centers - dim, axis=1)

    # Find the closest peak to the bright and dim FISH spots and their distances
    bright_peak = pks_grouped[np.argmin(bright_distances)][:,1:3]
    dim_peak = pks_grouped[np.argmin(dim_distances)][:,1:3]
    bright_peak_distance = bright_distances[np.argmin(bright_distances)]
    dim_peak_distance = dim_distances[np.argmin(dim_distances)]
    if bright_peak_distance > max_distance or dim_peak_distance > max_distance:
        print(f"Bright peak distance {bright_peak_distance} or dim peak distance {dim_peak_distance} exceeds max distance {max_distance}.")
        return pd.DataFrame()

    # Draw line connecting the 2 bright peaks and make a mask, and another for the dim peaks
    bright_mask = get_mask([bright_peak[0], bright_peak[1]], pimg.shape)
    dim_mask = get_mask([dim_peak[0], dim_peak[1]], pimg.shape)
    both_labels = bright_mask + dim_mask * 2

    # Get the intensity information for all channels
    proj_img = img.sum(axis=1)
    prop_df = get_all_props(proj_img, both_labels)

    if display:
        viewer.add_image(proj_img, scale=scale[1:], channel_axis=0)
        viewer.add_labels(both_labels, name='Areas', scale=scale[1:])
        viewer.layers[-1].contour = 2
    
    prop_df['FISH_area'] = df['area'].iloc[0:2].values
    prop_df['FISH_total_intensity'] = df['total_intensity'].iloc[0:2].values
    prop_df['FISH_mean_intensity'] = df['mean_intensity'].iloc[0:2].values
    prop_df['distance'] = np.array([bright_peak_distance, dim_peak_distance])
    prop_df['slices'] = img.shape[1]
    
    return prop_df


# Process Data

In [60]:
fnames = glob.glob('*/crop_rep*/*.tif')
fname = fnames[15]
fnames

['Cen11_CENPB_CENPA\\crop_rep0\\Image005-1-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image005-2-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image006-1-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image008-1-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image008-2-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image009-1-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image010-1-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image010-2-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image011-1-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image011-2-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image012-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image012-2-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image012-3-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image013-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image013-2.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image014-1-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image014-2-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image015-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image016-1-1.tif',
 'Cen11_CENPB_CENPA\\crop_rep0\\Image01

In [None]:
# View example to make sure everything works
process_file('Cen7_CENPB_CENPA/crop_rep0/spread011-1-1.tif', viewer=viewer, display=True)

Unnamed: 0,label,centroid-0,centroid-1,area,mean_intensity,total_intensity,mean_intensity_c1,total_intensity_c1,mean_intensity_c2,total_intensity_c2,mean_intensity_c3,total_intensity_c3,FISH_area,FISH_total_intensity,FISH_mean_intensity,distance,slices
0,1,85.5,230.0,556.0,8719.22168,4847887.0,359.631287,199954.995361,42512.09375,23636720.0,4707.208496,2617208.0,188.0,4851390.0,25805.265625,15.401784,10
1,2,25.693356,14.027257,587.0,3253.669434,1909904.0,302.195923,177389.006714,31383.332031,18422020.0,3916.161865,2298787.0,102.0,1628793.0,15968.558594,16.338032,10


In [62]:
df = pd.DataFrame()
for fname in fnames:
    print(fname)
    prop_df = process_file(fname, viewer=viewer, display=False, max_distance=20)
    prop_df['filename'] = fname
    df = pd.concat([df, prop_df])


Cen11_CENPB_CENPA\crop_rep0\Image005-1-1.tif
Bright peak distance 30.536214825662608 or dim peak distance 22.480871370881584 exceeds max distance 20.
Cen11_CENPB_CENPA\crop_rep0\Image005-2-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image006-1-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image008-1-1.tif
Bright peak distance 20.133730596947817 or dim peak distance 12.006934745989915 exceeds max distance 20.
Cen11_CENPB_CENPA\crop_rep0\Image008-2-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image009-1-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image010-1-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image010-2-1.tif
Bright peak distance 23.878146824451708 or dim peak distance 10.946911894969078 exceeds max distance 20.
Cen11_CENPB_CENPA\crop_rep0\Image011-1-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image011-2-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image012-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image012-2-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image012-3-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image013-1.tif
Cen11_CENPB_CENPA\crop_rep0\Image013-2.tif
Bright peak dist

# Analyze Data

In [66]:
df['State'] = 'Bright'
df.iloc[1::2, df.columns.get_loc('State')] = 'Dim'

In [63]:
df['folder'] = df['filename'].str.split('\\').str[0]
df['rep'] = df['filename'].str.split('\\').str[1]
df.groupby(['folder', 'rep']).agg({'label': 'count','slices':np.mean}).reset_index()


The provided callable <function mean at 0x000002B5A67244A0> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.



Unnamed: 0,folder,rep,label,slices
0,Cen11_CENPB_CENPA,crop_rep0,34,21.647059
1,Cen11_KNL1_CENPA,crop_rep1,22,20.272727
2,Cen11_KNL1_CENPA,crop_rep2,22,22.090909
3,Cen11_KNL1_CENPA,crop_rep3,34,24.764706
4,Cen11_NDC80_CENPA,crop_rep1,22,26.090909
5,Cen11_NDC80_CENPA,crop_rep2,18,22.0
6,Cen11_NDC80_CENPA,crop_rep3,24,32.0
7,Cen7_AuroraB_CENPA,crop_rep1,20,35.4
8,Cen7_AuroraB_CENPA,crop_rep2,28,29.785714
9,Cen7_CENPB_CENPA,crop_rep0,16,11.25


In [68]:
bright_df = df[df['State'] == 'Bright']
dim_df = df[df['State'] == 'Dim']

combined_df = bright_df.merge(dim_df, on=['filename', 'folder', ], suffixes=('_bright', '_dim'))
combined_df['FISH_ratio']= combined_df['FISH_total_intensity_bright'] / combined_df['FISH_total_intensity_dim']
combined_df['c0_ratio'] = combined_df['total_intensity_bright'] / combined_df['total_intensity_dim']
combined_df['c1_ratio'] = combined_df['total_intensity_c1_bright'] / combined_df['total_intensity_c1_dim']
combined_df['c2_ratio'] = combined_df['total_intensity_c2_bright'] / combined_df['total_intensity_c2_dim']
combined_df['c3_ratio'] = combined_df['total_intensity_c3_bright'] / combined_df['total_intensity_c3_dim']

combined_df['FISH_ratio_i'] = 1 / combined_df['FISH_ratio']
combined_df['c0_ratio_i'] = 1 / combined_df['c0_ratio']
combined_df['c1_ratio_i'] = 1 / combined_df['c1_ratio']
combined_df['c2_ratio_i'] = 1 / combined_df['c2_ratio']
combined_df['c3_ratio_i'] = 1 / combined_df['c3_ratio']
combined_df

Unnamed: 0,filename,label_bright,centroid-0_bright,centroid-1_bright,area_bright,mean_intensity_bright,total_intensity_bright,mean_intensity_c1_bright,total_intensity_c1_bright,mean_intensity_c2_bright,...,FISH_ratio,c0_ratio,c1_ratio,c2_ratio,c3_ratio,FISH_ratio_i,c0_ratio_i,c1_ratio_i,c2_ratio_i,c3_ratio_i
0,Cen11_CENPB_CENPA\crop_rep0\Image005-2-1.tif,1.0,43.0,25.5,202.0,6678.208008,1.348998e+06,2914.054443,5.886390e+05,10070.732422,...,5.139688,3.599882,0.652633,0.873813,0.345908,0.194564,0.277787,1.532255,1.144410,2.890938
1,Cen11_CENPB_CENPA\crop_rep0\Image006-1-1.tif,1.0,59.0,63.0,259.0,4550.405273,1.178555e+06,3300.525146,8.548360e+05,8664.069336,...,1.464497,1.403802,0.923508,0.838094,1.095782,0.682828,0.712351,1.082828,1.193184,0.912590
2,Cen11_CENPB_CENPA\crop_rep0\Image008-2-1.tif,1.0,54.0,40.5,262.0,3657.500000,9.582650e+05,4565.309082,1.196111e+06,17636.916016,...,1.424610,1.342634,1.002469,0.954634,1.811206,0.701947,0.744804,0.997537,1.047522,0.552118
3,Cen11_CENPB_CENPA\crop_rep0\Image009-1-1.tif,1.0,227.5,47.5,280.0,3336.357178,9.341800e+05,3154.071533,8.831400e+05,10976.286133,...,1.724991,1.577500,1.459948,1.282037,1.264805,0.579713,0.633914,0.684956,0.780009,0.790636
4,Cen11_CENPB_CENPA\crop_rep0\Image010-1-1.tif,1.0,52.0,40.5,466.0,1656.057983,7.717230e+05,3351.866943,1.561970e+06,13776.659180,...,1.087691,1.082527,1.149994,1.543461,1.563911,0.919379,0.923764,0.869570,0.647894,0.639423
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,Cen7_NDC80_CENPA\crop_rep2\rep2_image006-1.tif,1.0,24.5,23.0,310.0,4312.044922,1.336734e+06,443.383881,1.374490e+05,10625.077148,...,1.952146,1.775041,0.621098,0.926863,1.042264,0.512257,0.563367,1.610052,1.078908,0.959450
251,Cen7_NDC80_CENPA\crop_rep2\rep2_image006-2.tif,1.0,65.5,42.0,280.0,2326.782227,6.514990e+05,469.857147,1.315600e+05,18281.410156,...,2.025689,0.851452,0.919537,1.179384,1.568109,0.493659,1.174464,1.087504,0.847900,0.637711
252,Cen7_NDC80_CENPA\crop_rep2\rep2_image008-1.tif,1.0,329.5,79.5,238.0,4083.836182,9.719530e+05,321.663879,7.655600e+04,16343.428711,...,2.234588,1.772059,1.073040,0.944609,0.745072,0.447510,0.564315,0.931932,1.058639,1.342151
253,Cen7_NDC80_CENPA\crop_rep2\rep2_image010-1.tif,1.0,261.5,150.0,258.0,5979.356445,1.542674e+06,413.604645,1.067100e+05,8592.771484,...,1.317193,1.180988,1.252906,0.850141,1.341735,0.759190,0.846749,0.798144,1.176275,0.745303


In [None]:
# Make interactive plot for examining FISH signal ratios

f=go.FigureWidget(
    #px.box(df, x='folder', color='State', y='total_intensity_c3', points='all', hover_data=['filename'])
    px.box(combined_df, x='folder', y='FISH_ratio_i', points='all', hover_data=['filename'], range_y=[0,2])
    )

def click_fn(trace, points, state):
    
    if (len(points.point_inds)>0):
        idx = f.data[points.trace_index]['customdata'][points.point_inds[-1]][0]
        print(idx)
        viewer.layers.clear()
        process_file(idx, viewer=viewer, display=True)

for a in f.data:
    a.on_click(click_fn)
f.write_html('FISH_signal_ratio.html')
f

FigureWidget({
    'data': [{'alignmentgroup': 'True',
              'boxpoints': 'all',
              'customdata': array([['Cen11_CENPB_CENPA\\crop_rep0\\Image005-2-1.tif'],
                                   ['Cen11_CENPB_CENPA\\crop_rep0\\Image006-1-1.tif'],
                                   ['Cen11_CENPB_CENPA\\crop_rep0\\Image008-2-1.tif'],
                                   ...,
                                   ['Cen7_NDC80_CENPA\\crop_rep2\\rep2_image008-1.tif'],
                                   ['Cen7_NDC80_CENPA\\crop_rep2\\rep2_image010-1.tif'],
                                   ['Cen7_NDC80_CENPA\\crop_rep2\\rep2_image015-1.tif']], dtype=object),
              'hovertemplate': 'folder=%{x}<br>FISH_ratio_i=%{y}<br>filename=%{customdata[0]}<extra></extra>',
              'legendgroup': '',
              'marker': {'color': '#636efa'},
              'name': '',
              'notched': False,
              'offsetgroup': '',
              'orientation': 'v',
            

In [None]:
# Make interactive plot for examining CenpA signal ratios

f=go.FigureWidget(
    #px.box(df, x='folder', color='State', y='total_intensity_c3', points='all', hover_data=['filename'])
    px.box(combined_df, x='folder', y='c1_ratio_i', points='all', hover_data=['filename'], range_y=[0,2])
    )

def click_fn(trace, points, state):
    
    if (len(points.point_inds)>0):
        idx = f.data[points.trace_index]['customdata'][points.point_inds[-1]][0]
        print(idx)
        viewer.layers.clear()
        process_file(idx, viewer=viewer, display=True)

for a in f.data:
    a.on_click(click_fn)
f.write_html('cenpA_signal_ratio.html')
f

FigureWidget({
    'data': [{'alignmentgroup': 'True',
              'boxpoints': 'all',
              'customdata': array([['Cen11_CENPB_CENPA\\crop_rep0\\Image005-2-1.tif'],
                                   ['Cen11_CENPB_CENPA\\crop_rep0\\Image006-1-1.tif'],
                                   ['Cen11_CENPB_CENPA\\crop_rep0\\Image008-2-1.tif'],
                                   ...,
                                   ['Cen7_NDC80_CENPA\\crop_rep2\\rep2_image008-1.tif'],
                                   ['Cen7_NDC80_CENPA\\crop_rep2\\rep2_image010-1.tif'],
                                   ['Cen7_NDC80_CENPA\\crop_rep2\\rep2_image015-1.tif']], dtype=object),
              'hovertemplate': 'folder=%{x}<br>c1_ratio_i=%{y}<br>filename=%{customdata[0]}<extra></extra>',
              'legendgroup': '',
              'marker': {'color': '#636efa'},
              'name': '',
              'notched': False,
              'offsetgroup': '',
              'orientation': 'v',
              

In [None]:
# Make interactive plot for examining kinteochore protein signal ratios
f=go.FigureWidget(
    px.box(combined_df, x='folder', y='c2_ratio_i', points='all', hover_data=['filename'], range_y=[0,2], height=800)
    )

def click_fn(trace, points, state):
    
    if (len(points.point_inds)>0):
        idx = f.data[points.trace_index]['customdata'][points.point_inds[-1]][0]
        print(idx)
        process_file(idx, viewer=viewer, display=True)
        for layer in viewer.layers:
            layer.visible = False
        viewer.layers[-1].visible = True
        viewer.layers[-4].visible = True
        viewer.layers[-5].visible = True
        viewer.layers[-6].visible = True
        viewer.layers[-1].name = idx

for a in f.data:
    a.on_click(click_fn)
f.write_html('core_protein_signal_ratio.html')
f

FigureWidget({
    'data': [{'alignmentgroup': 'True',
              'boxpoints': 'all',
              'customdata': array([['Cen11_CENPB_CENPA\\crop_rep0\\Image005-2-1.tif'],
                                   ['Cen11_CENPB_CENPA\\crop_rep0\\Image006-1-1.tif'],
                                   ['Cen11_CENPB_CENPA\\crop_rep0\\Image008-2-1.tif'],
                                   ...,
                                   ['Cen7_NDC80_CENPA\\crop_rep2\\rep2_image008-1.tif'],
                                   ['Cen7_NDC80_CENPA\\crop_rep2\\rep2_image010-1.tif'],
                                   ['Cen7_NDC80_CENPA\\crop_rep2\\rep2_image015-1.tif']], dtype=object),
              'hovertemplate': 'folder=%{x}<br>c2_ratio_i=%{y}<br>filename=%{customdata[0]}<extra></extra>',
              'legendgroup': '',
              'marker': {'color': '#636efa'},
              'name': '',
              'notched': False,
              'offsetgroup': '',
              'orientation': 'v',
              

In [72]:
df.groupby(['folder']).agg({'label':'size', 'filename':lambda x: len(np.unique(x))}).reset_index()

Unnamed: 0,folder,label,filename
0,Cen11_CENPB_CENPA,34,17
1,Cen11_KNL1_CENPA,78,39
2,Cen11_NDC80_CENPA,64,32
3,Cen7_AuroraB_CENPA,48,24
4,Cen7_CENPB_CENPA,92,46
5,Cen7_INCENP_CENPA,46,23
6,Cen7_KNL1_CENPA,98,49
7,Cen7_NDC80_CENPA,50,25


In [74]:
combined_df.to_csv('combined_peak_datav2.csv')