In [1]:
import numpy as np
import time, os, sys
from urllib.parse import urlparse
import skimage.io
import scipy as sp
import tifffile
from cellpose import plot
import pandas as pd
import glob

from urllib.parse import urlparse
from cellpose import models
import skimage.measure as measure
import skimage.transform as transform
import plotly.graph_objs as go

import cv2
import napari
import scipy.ndimage as ndimage
from roifile import ImagejRoi
from cellpose import models
import glob
import plotly.express as px
from scipy import stats

In [2]:
viewer = napari.Viewer()

# Real Work

In [3]:
wwidth=65
wheight=130


In [4]:
UBF_channel = 3
marker_channel = 2
rDNA_channel = 1
DAPI_channel = 0

In [5]:
def pad_image_to(img, width, height):
    shp = img.shape
    newimg = img.copy()
    
    if (shp[-2]>width):
        offset = int(np.round((shp[-2]-width)/2))
        newimg = newimg[offset:(offset+width),:]
    if (shp[-1]>height):
        offset = int(np.round((shp[-1]-height)/2))
        newimg = newimg[:,offset:(offset+height)]

    shp = newimg.shape
    w = width-shp[-2]
    padl = int(np.round(w/2))
    padr = width-padl-shp[-2]
    h = height-shp[-1]
    padu = int(np.round(h/2))
    padd = height-padu-shp[-1]
    
    newimg = np.pad(newimg, ((padl, padr), (padu, padd)))
    return newimg

In [6]:
def crop_and_rotate(img, mask, angle):
    xs, ys = np.where(mask)
    border = 4
    bounds = np.array([[np.min(xs), np.min(ys)], [np.max(xs), np.max(ys)]])
    min_x = np.max([bounds[0,0]-border, 0])
    min_y = np.max([bounds[0,1]-border, 0])
    max_x = np.min([bounds[1,0]+border, img.shape[1]])
    max_y = np.min([bounds[1,1]+border, img.shape[2]])

    new_img = img[:,min_x:max_x, min_y:max_y]
    new_mask = mask[min_x:max_x, min_y:max_y]
    new_mask = pad_image_to(transform.rotate(new_mask, angle=-angle, resize=True), wheight, wwidth)
    
    
    tmp = new_mask.astype(np.uint8)
    kernel = np.ones((1, 1), np.uint8)
    new_mask = cv2.dilate(tmp, kernel) 
    
    new_img = np.array([pad_image_to(transform.rotate(ab, angle=-angle, resize=True,
                                                      preserve_range=True), wheight, wwidth) for ab in new_img])
    top = np.sum((new_img)[rDNA_channel,0:int(wheight/2),:])/np.sum((new_img>0)[rDNA_channel,0:int(wheight/2),:])
    bottom = np.sum((new_img)[rDNA_channel,-1:-int(wheight/2):-1,:])/np.sum((new_img>0)[rDNA_channel,-1:-int(wheight/2):-1,:])
    if top<bottom:
        new_img = new_img[:,::-1,:]
        new_mask = new_mask[::-1,:]
    return new_img, new_mask
    

In [7]:
def fast_backsub(inp, display=False):
    smoothed = sp.ndimage.gaussian_filter(inp, sigma=4.0)
    min_filter = sp.ndimage.minimum_filter(smoothed, 20.0)
    min_smoothed = sp.ndimage.gaussian_filter(min_filter, sigma=20.0)
    backsub = inp.astype(np.single) - min_smoothed
    
    if (display):
        viewer.add_image(min_filter, blending='additive', colormap='green')
        viewer.add_image(min_smoothed, blending='additive', colormap='green')
        viewer.add_image(backsub, blending='additive', colormap='magenta')
    return backsub

In [8]:
def find_rDNA_masks(rDNA_img, chromosomes, initial_threshold=100, display=False):
    
    backsubbed = fast_backsub(rDNA_img, display=False)    
    backsubbed = backsubbed-0
    backsubbed[backsubbed<0] = 0
    blurred = sp.ndimage.gaussian_filter(backsubbed, sigma=2)
    rDNA = blurred>initial_threshold
    
    labels = ndimage.label(rDNA)[0]
    maxes = sp.ndimage.labeled_comprehension(blurred, labels, np.arange(0,np.max(labels)+1),
                                                  np.max, np.single, 0)
    #print(maxes)
    max_image = maxes[labels]
    threshed_image = blurred>(0.4*max_image)
    threshed_image[labels==0] = 0
    
    if (display):
        viewer.add_image(backsubbed, blending='additive', colormap='green')
        viewer.add_image(blurred, blending='additive', colormap='green')
        viewer.add_labels(labels)
        #viewer.add_image(rDNA, blending='additive', colormap='magenta')
        viewer.add_image(threshed_image, blending='additive', colormap='magenta')
        
        
    return threshed_image

In [9]:
def merge_rDNA_labels(rDNA_labels, chromosome_labels):
    rtn_labels = np.zeros(rDNA_labels.shape)
    for a in range(1,np.max(rDNA_labels)+1):
        sub = chromosome_labels[rDNA_labels==a]
        candidate = stats.mode(sub[sub>0])[0]
        if np.isnan(candidate):
            candidate = 0
        rtn_labels[rDNA_labels==a] = candidate
    return rtn_labels.astype(int)

In [10]:
def ptile(inp):
    return np.percentile(inp, 50)

In [11]:
def cleanup_labels(labels):
    news = labels.copy()
    objs = len(np.unique(labels))-1
    lab_list = np.unique(labels)
    lab_list = lab_list[lab_list!=0]
    for a in np.arange(1,objs+1):
        old_id = lab_list[a-1]
        news[news==old_id] = a
    return news

# Cleanup labels

In [12]:
fnames = glob.glob('./acrocentric_inferred/*.tif')
for f in fnames:
    img = tifffile.imread(f)
    img[-1] = cleanup_labels(img[-1])
    tifffile.imwrite(f, img.astype(np.ushort), imagej=True)

Just testing results

In [13]:
fnames = glob.glob('./acrocentric_inferred/*.tif')
f = fnames[16]

In [14]:
img = tifffile.imread(f)
chromosomes = img[-1].astype(int)
img = img[0:-1]
rDNA = tifffile.imread(f.replace('acrocentric_inferred', 'rDNA_inferred'))[-1].astype(int)

In [15]:
rDNA_merged = merge_rDNA_labels(rDNA, chromosomes)

In [16]:
viewer.layers.clear()
viewer.add_image(img, channel_axis=0, name='img')
viewer.add_labels(chromosomes, name='chromosomes')
viewer.add_labels(rDNA, name='rDNA', visible=False)
viewer.add_labels(rDNA_merged, name='rDNA_merged')

<Labels layer 'rDNA_merged' at 0x2428f2fafb0>

# Find rDNA, UBF, markers and save

In [17]:
df = pd.DataFrame()

for f in glob.glob('./acrocentric_inferred/*.tif'):
#for f in glob.glob('./test/*.tif'):
    print(f)
    img = tifffile.imread(f)
    chromosomes = img[-1].astype(int)
    img = img[0:-1]
    
    rDNA = tifffile.imread(f.replace('acrocentric_inferred', 'rDNA_inferred'))[-1].astype(int)
    rDNA_merged = merge_rDNA_labels(rDNA, chromosomes)

    intdens = []
    rDNA_areas = []
    ubfintdens = []
    medians = sp.ndimage.labeled_comprehension(img[rDNA_channel], chromosomes, np.arange(1,np.max(chromosomes)+1),
                                                  ptile, np.single, 0)
    areas = sp.ndimage.labeled_comprehension(chromosomes, chromosomes, np.arange(1,np.max(chromosomes)+1),
                                                  len, np.single, 0)
    marker_medians = sp.ndimage.labeled_comprehension(img[marker_channel], chromosomes, np.arange(1,np.max(chromosomes)+1),
                                                  np.median, np.single, 0)
    marker_intdens = sp.ndimage.labeled_comprehension(img[marker_channel], chromosomes, np.arange(1,np.max(chromosomes)+1),
                                                  np.mean, np.single, 0)
    labs = sp.ndimage.labeled_comprehension(chromosomes, chromosomes, np.arange(1,np.max(chromosomes)+1),
                                                  np.median, np.single, 0)
    # Removed for Orangutan dataset as the marker stains the ENTIRE chromosome
    #marker_intdens = marker_intdens - marker_medians
    
    #print(medians)
    medians = np.median(medians)
    for a in np.arange(1, np.max(chromosomes)+1):
        pixels_with_rDNA = (rDNA_merged==a)

        #New added as of 20220614
        pixels_in_chromosome_but_not_rDNA = (chromosomes==a) & ~(pixels_with_rDNA)
        background = np.median(img[rDNA_channel][pixels_in_chromosome_but_not_rDNA])
        UBF_background = np.median(img[UBF_channel][pixels_in_chromosome_but_not_rDNA])
        
        #background = medians
        back_subbed = img[rDNA_channel][pixels_with_rDNA] - background
        back_subbed_ubf = img[UBF_channel][pixels_with_rDNA] - UBF_background
        if np.sum(pixels_with_rDNA)>0:
            intdens.append(np.sum(back_subbed))
            ubfintdens.append(np.sum(back_subbed_ubf))
        else:
            intdens.append(0)
            ubfintdens.append(0)
        rDNA_areas.append(np.sum(pixels_with_rDNA))
    intdens = np.array(intdens)
    rDNA_areas = np.array(rDNA_areas)
    ubfintdens = np.array(ubfintdens)
    #tifffile.imwrite('./rDNAMasks/'+f.split('\\')[-1], 
    #                 np.array([img[DAPI_channel], img[rDNA_channel], img[marker_channel], rDNA_mask]).astype(np.single), 
    #                 imagej=True)

    values = np.concatenate([[0], intdens])[chromosomes]

    tdf = pd.DataFrame({'File':f, 'IntDens':intdens/np.sum(intdens), 
                       'rDNA_Areas':rDNA_areas, 
                       'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,
                       'UBF_Mean':ubfintdens, 'UBF_IntDens':ubfintdens/np.sum(ubfintdens), 'labels':labs})

    props = measure.regionprops(chromosomes)
    angles = np.array([p['orientation'] for p in props])
    areas = np.array([p['Area'] for p in props])

    lst = np.arange(1,np.max(chromosomes)+1)

    sorted_chromosomes = lst[np.argsort(-marker_intdens)]
    tdf = tdf.sort_values(by='Marker_Mean', ascending=False)

    nimgs = []
    nmasks = []
    for a in sorted_chromosomes:
        nimg, nmask = crop_and_rotate(img, chromosomes==a, angles[a-1]/np.pi*180)
        nimg = np.concatenate([nimg, [nmask]])
        nimgs.append(nimg)
        nmasks.append(nmask)


    columns = 20
    rows = int(np.ceil(len(nimgs)/columns))

    master_img = np.zeros([5, wheight*rows, wwidth*columns])
    idx = 0
    
    for y in np.arange(0,wheight*rows, wheight):
        for x in np.arange(0,wwidth*columns, wwidth):
            master_img[:,y:(y+wheight), x:(x+wwidth)] = nimgs[idx]
            idx = idx+1
            if idx>=len(nimgs):
                break

    viewer.layers.clear()
    viewer.add_image(master_img, channel_axis=0)
    viewer.layers[1].colormap='red'
    viewer.layers[2].colormap='green'
    viewer.layers[3].colormap='blue'

    tifffile.imwrite('./Karyos/'+f.split('\\')[-1], master_img.astype(np.ushort), imagej=True)
    df = pd.concat([df, tdf])

./acrocentric_inferred\B.Orang_chr13_UBF_1001-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1002-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1003-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1004-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1005-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1006-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1007-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1008-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1009-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1011-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1012-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1013-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1014-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1015-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1016-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1017-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1018-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1019-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1020-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr13_UBF_1021-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1001-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1002-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1003-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1004-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1005-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1006-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1007-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1009-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1010-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1011-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1012-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1013-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1014-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1015-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1016-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1017-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1018-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1019-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1020-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr14_UBF_1021-1.tif
./acrocentric_inferred\B.Orang_chr14_UBF_1022-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1002-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1003-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1004-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1005-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1006-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1007-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1008-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1009-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1010-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1011-1.tif
./acrocentric_inferred\B.Orang_chr15_UBF_1012-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1013-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1014-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1015-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1016-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1017-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1018-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1019-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr15_UBF_1020-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1001-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1003-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1004-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1005-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1006-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1007-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1008-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1009-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1010-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1011-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1012-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1013-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1014-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1015-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1016-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1017-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1018-1.tif
./acrocentric_inferred\B.Orang_chr18_UBF_1019-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr18_UBF_1020-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1001-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1002-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1004-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1005-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1006-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1007-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1008-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1009-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1010-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1011-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1012-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1013-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1014-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1015-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1016-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1017-1.tif
./acrocentric_inferred\B.Orang_chr21_UBF_1018-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr21_UBF_1019-1.tif
./acrocentric_inferred\B.Orang_chr22_UBF_1001-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1002-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1003-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1004-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1005-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1006-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1007-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1008-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1009-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1010-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1011-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1013-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1014-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1015-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1016-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1017-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1018-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1019-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr22_UBF_1020-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1001-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1002-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1003-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1004-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1005-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1006-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1007-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1008-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1009-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1010-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1011-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1012-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1013-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1014-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1015-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1016-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1017-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1018-2.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1018.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1019-1.tif
./acrocentric_inferred\B.Orang_chr2_UBF_1020-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1021-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1022-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1023-1.tif
./acrocentric_inferred\B.Orang_chr2_UBF_1024-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1025-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr2_UBF_1026-1.tif
./acrocentric_inferred\B.Orang_chr9_UBF_1001-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1003-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1004-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1008-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1009-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1012-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1014-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1015-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1016-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1017-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1017-2.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1018-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1019-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1020-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1021-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1022-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1023-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1024-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1025-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1026-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1027-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1028-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1029-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1030-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1031-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1032-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1033-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1034-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


./acrocentric_inferred\B.Orang_chr9_UBF_1035-1.tif


  'Areas':areas, 'Mean':intdens/rDNA_areas, 'Marker_Mean':marker_intdens,


In [18]:
df.pivot_table(index='File', values='Areas', aggfunc=len)

Unnamed: 0_level_0,Areas
File,Unnamed: 1_level_1
./acrocentric_inferred\B.Orang_chr13_UBF_1001-1.tif,18
./acrocentric_inferred\B.Orang_chr13_UBF_1002-1.tif,18
./acrocentric_inferred\B.Orang_chr13_UBF_1003-1.tif,18
./acrocentric_inferred\B.Orang_chr13_UBF_1004-1.tif,15
./acrocentric_inferred\B.Orang_chr13_UBF_1005-1.tif,17
...,...
./acrocentric_inferred\B.Orang_chr9_UBF_1031-1.tif,18
./acrocentric_inferred\B.Orang_chr9_UBF_1032-1.tif,18
./acrocentric_inferred\B.Orang_chr9_UBF_1033-1.tif,18
./acrocentric_inferred\B.Orang_chr9_UBF_1034-1.tif,18


In [19]:
df['File'].str.split('_').str[2]

14    chr13
6     chr13
8     chr13
11    chr13
17    chr13
      ...  
10     chr9
7      chr9
3      chr9
9      chr9
0      chr9
Name: File, Length: 3030, dtype: object

In [20]:
print(df)

                                                 File   IntDens  rDNA_Areas  \
14  ./acrocentric_inferred\B.Orang_chr13_UBF_1001-...  0.074889         129   
6   ./acrocentric_inferred\B.Orang_chr13_UBF_1001-...  0.110767         150   
8   ./acrocentric_inferred\B.Orang_chr13_UBF_1001-...  0.014362          93   
11  ./acrocentric_inferred\B.Orang_chr13_UBF_1001-...  0.000000           0   
17  ./acrocentric_inferred\B.Orang_chr13_UBF_1001-...  0.024165          68   
..                                                ...       ...         ...   
10  ./acrocentric_inferred\B.Orang_chr9_UBF_1035-1...  0.080987         102   
7   ./acrocentric_inferred\B.Orang_chr9_UBF_1035-1...  0.254754         189   
3   ./acrocentric_inferred\B.Orang_chr9_UBF_1035-1...  0.024907          68   
9   ./acrocentric_inferred\B.Orang_chr9_UBF_1035-1...  0.095712         140   
0   ./acrocentric_inferred\B.Orang_chr9_UBF_1035-1...  0.020922          86   

    Areas         Mean  Marker_Mean   UBF_Mean  UBF

In [21]:
df['Chromosome'] = df['File'].str.split('_').str[2]
#df.loc[df['Chromosome']=='rDNA', 'Chromosome'] = df.loc[df['Chromosome']=='rDNA', 'File'].str.split('_').str[4]

In [22]:
cdf = df.pivot_table(index='File', values='Areas', aggfunc=len).reset_index()
cdf = df.groupby(['File', 'Chromosome']).agg({'Areas':len}).reset_index()
#cdf['Strain'] = cdf['File'].str.split('H').str[1].str[1:6]
px.box(cdf, x='Chromosome', y='Areas', points='all', hover_data=['File'])

In [23]:
df.to_csv('Results.csv')