In [25]:
import glob
import itertools
import multiprocessing as mp
import os

import bioframe
import cooler
import cooltools.insulation
import cooltools.lib.plotting
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from skimage.filters import threshold_li, threshold_otsu

In [35]:
windows = [10000]
resolution = 1000

cooler_dir = '/groups/goloborodko/projects/lab/baxterLab2020/results/distiller/coolers_library'
coolers = [f for f in glob.glob(os.path.join(cooler_dir, '*.mapq_30.1000.mcool'))]

out_path = '/groups/goloborodko/projects/lab/baxterLab2020/results/boundaries/boundaries.csv'

boundaries_result = pd.DataFrame(columns=windows)

In [36]:
def calc_boundaries(cooler_path, windows):
    #print(f'calc_boundaries: {clr}')
    clr = cooler.Cooler('::/resolutions/'.join((cooler_path, str(resolution))))

    boundaries = cooltools.insulation.find_boundaries(
            cooltools.insulation.calculate_insulation_score(clr, windows)
        )

    out = []
    for window in windows:
        threshold = threshold_otsu(
            boundaries[f'boundary_strength_{window}'].dropna().values
        )
        out.append((boundaries[f'boundary_strength_{window}'].dropna() >= threshold).sum())

    return (os.path.basename(cooler_path).split('_nextseq')[0], out)

In [37]:
def store_boundaries(result):
    boundaries_result.loc[result[0]] = result[1]

In [38]:
def error_f(err):
    print(err)

In [39]:
pool = mp.Pool(mp.cpu_count())

for cooler_path in coolers:
    pool.apply_async(calc_boundaries, args=(cooler_path, windows), callback=store_boundaries, error_callback=error_f)

pool.close()
pool.join()

In [40]:
boundaries_result

Unnamed: 0,10000
200828_HiC_2083_cdc45td_noco_2b,322
200129_HiC_1424_cdc20td_ipl1321_brn1v5_cdc20_1b,287
200819_HiC_2144_cdc15td_cdc15_2b,294
201120_HiC_500_smc2tdsmc2K38I_45sphase_1b,245
210118_HiC_2122_top2tdsmc2tdsmc2K381_45sphase_1b,231
201120_HiC_2432_cdc20tdcdc45td_cdc20_1b,272
190617_HiC_ak265_cdc45td_top2td_noco_1b,310
200302_HiC_222_cdc20td_cdc510_brn1v5_cdc20_1b,262
201120_HiC_1807_cdc20tdtop2tdrad51del_cdc20_1b,295
190723_HiC_938_cdc20td_noco_1b,260


boundaries_result.to_csv(out_path)