In [1]:
import cooler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import multiprocessing as mp
import h5py
import pickle
import seaborn as sns
import tqdm

from scipy import stats

from statsmodels.stats.multitest import multipletests

import sys
import itertools
import time
import datetime

sys.path.insert(0, '/cndd/fangming/CEMBA/snmcseq_dev')
from __init__ import *
from __init__jupyterlab import * 
import snmcseq_utils

from enhancer_gene_hic_validation_utils import *

In [2]:
# default_colorbar_kws = DEFAULT_COLORBAR_KWS
today = datetime.date.today()

output_res_format = ('/sphere/fangming/enhancers/scripts/results_new/'
              'hic_contact_enrichment_metacell_celltypespec{}_{}_{}_{}.pkl' #.format(int(100*active_th), celltype, resolution, today)
             )
print(today)

2020-12-15


In [3]:
chromosomes = ['chr'+str(i+1) for i in range(22)] + ['chrX']
celltypes = ['L2_3', 'L4', 'L5', 'L6', 
             'Pvalb', 'Sst', 'Vip', 'Ndnf']
resolution = 10000

# HiC-analysis - use only cell type-specific pairs

Todo:
- assign 900 clusters into 8 cell types? (maybe not)
- or how do we figure out the cluster assignment for each linked pair?

In [4]:
# enhancers lifted
min_match = 0.1

f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/liftover/enhs_list_reformat_{:.2f}_lifted.bed'.format(min_match)
enh_list = pd.read_csv(f, sep='\t', header=None, names=['chr', 'start', 'end', 'enh_id'])
enh_list['center'] = (enh_list['start'] + enh_list['end'])/2
print(enh_list.shape)
enh_list.head()

# tss lifted
# f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/liftover/gencode.vM16.annotation_genes_lifted.bed'
f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/liftover/gencode.vM16.annotation_genes_tss_lifted.bed'
tss_list = pd.read_csv(f, sep='\t', header=None, names=['chr', 'start', 'end', 'gid', 'gene_name'])
tss_list['gid'] = tss_list['gid'].apply(lambda x: x.split('.')[0])
print(tss_list.shape)
tss_list.head()

# enh - tss linked list
# orig significant pairs (enh_index and gid)
f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/results_new/fig2_metacell_201213_linked_pairs_mc.tsv'
linked_pairs_mc = pd.read_csv(f, sep="\t")
print(linked_pairs_mc.shape)

f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/results_new/fig2_metacell_201213_linked_pairs_atac.tsv'
linked_pairs_atac = pd.read_csv(f, sep="\t")
print(linked_pairs_atac.shape)

f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/results_new/fig2_metacell_201213_correlated_pairs_mc.tsv'
correlated_pairs_mc = pd.read_csv(f, sep="\t")
print(correlated_pairs_mc.shape)

f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/results_new/fig2_metacell_201213_correlated_pairs_atac.tsv'
correlated_pairs_atac = pd.read_csv(f, sep="\t")
print(correlated_pairs_atac.shape)

linked_pairs_mc.head()

(126134, 5)
(36554, 5)
(12535, 3)
(8351, 3)
(168180, 3)
(166304, 3)


Unnamed: 0,gene,enh,dist
0,ENSMUSG00000000058,181991,80803
1,ENSMUSG00000000058,181993,56051
2,ENSMUSG00000000058,181996,16524
3,ENSMUSG00000000058,181997,14555
4,ENSMUSG00000000058,182001,35561


In [5]:
linked_pairs_both = pd.merge(linked_pairs_mc, linked_pairs_atac, on=['gene', 'enh', 'dist'])
correlated_pairs_both = pd.merge(correlated_pairs_mc, correlated_pairs_atac, on=['gene', 'enh', 'dist'])
print(linked_pairs_both.shape, 
      correlated_pairs_both.shape, 
     )

(5244, 3) (138620, 3)


In [6]:
# gene expression profiles
f = '/sphere/fangming/enhancers/scripts/data_organized/gene_rna_clsts.tsv'
gene_rna_clsts = pd.read_csv(f, sep='\t', index_col=0) #header=True, index=True)
print(gene_rna_clsts.shape)
gene_rna_clsts.head()

# cluster annotation
f = '/cndd2/fangming/projects/scf_enhancers/enhancer_ethan38_200520/ref/annotations_order.tsv'
clst_annot = pd.read_csv(f, sep='\t').set_index('cluster')['annotation']
print(clst_annot.shape)
clst_annot.head()

(12095, 38)
(38,)


cluster
1-1-1       L4/5 IT (1)
1-2-1       L4/5 IT (2)
1-2-2       L4/5 IT (3)
2-1-1    L6 CT Cpa6 (1)
2-1-2    L6 CT Cpa6 (2)
Name: annotation, dtype: object

In [7]:
# merge cluster name
annot_rename_lookup = {
    'L2/3': "L2_3",
    'L4/5': "L4",
    'L6b': "L6",
    'Sncg': "Ndnf",
    'Lamp5': "Ndnf",
}

def rename_annot(x, annot_rename_lookup):
    """
    """
    if x in annot_rename_lookup.keys():
        return annot_rename_lookup[x]
    else:
        return x
    
clst_annot_broad = pd.Series([annot.split(' ')[0] for annot in clst_annot.values], 
                             index=clst_annot.index)
clst_annot_broad = clst_annot_broad.apply(lambda x: rename_annot(x, annot_rename_lookup))

print(clst_annot_broad.shape, 
      len(np.unique(clst_annot_broad.values)),
      np.unique(clst_annot_broad.values),
     )

(38,) 8 ['L2_3' 'L4' 'L5' 'L6' 'Ndnf' 'Pvalb' 'Sst' 'Vip']


In [8]:
# merge RNA profiles
_tmp = gene_rna_clsts.T
_tmp['annot'] = clst_annot_broad.loc[_tmp.index]
gene_rna_clsts_broad = _tmp.groupby('annot').median().T
gene_rna_clsts_broad_rank = gene_rna_clsts_broad.rank(pct=True, axis=1)
print(gene_rna_clsts_broad_rank.shape)
gene_rna_clsts_broad_rank.head()

(12095, 8)


annot,L2_3,L4,L5,L6,Ndnf,Pvalb,Sst,Vip
ENSMUSG00000000001,0.5,0.75,0.25,0.625,0.875,0.125,0.375,1.0
ENSMUSG00000000056,0.125,0.75,0.875,0.5,0.625,1.0,0.25,0.375
ENSMUSG00000000058,0.625,1.0,0.25,0.375,0.75,0.125,0.5,0.875
ENSMUSG00000000078,0.25,1.0,0.625,0.75,0.125,0.5,0.875,0.375
ENSMUSG00000000085,0.25,0.125,0.625,0.375,0.875,0.5,0.75,1.0


In [9]:
active_th = 0.5
# distances
distance_cutoff = 1e5
distance_idx = np.arange(1, int(distance_cutoff/resolution)+1)
distances = distance_idx * resolution

for celltype in celltypes:
    print(celltype)
    
    contacts = [[] for i in np.arange(len(distances))]
    contacts_sig_pairs = collections.OrderedDict({
            'linked_mc': [[] for i in np.arange(len(distances))],
            'linked_atac': [[] for i in np.arange(len(distances))],
            'linked_both': [[] for i in np.arange(len(distances))],
            'correlated_mc': [[] for i in np.arange(len(distances))],
            'correlated_atac': [[] for i in np.arange(len(distances))],
            'correlated_both': [[] for i in np.arange(len(distances))],
        })

    for chromosome in chromosomes:
        print(celltype, resolution, chromosome)

        # read in the matrix 
        resource_string = (
             '/sphere/fangming/enhancers/data_m3c/{}_all_brain.txt_1kb_contacts.mcool'
             '::/resolutions/{}'.format(celltype, resolution)
             )
        c = cooler.Cooler(resource_string)
        mat_all = c.matrix(balance=True).fetch(chromosome)

        # all enhancers and tss on that chromosomes
        enhs = enh_list[enh_list['chr']==chromosome]
        tsss = tss_list[tss_list['chr']==chromosome]
        
        # only include the gene if it is actively expressed (>50% across cell types)
        active_genes = gene_rna_clsts_broad_rank[gene_rna_clsts_broad_rank[celltype]>active_th].index.values
        tsss = tsss[tsss['gid'].isin(active_genes)]
        
        # get paired bins
        paired_bin_tables = collections.OrderedDict({
            'linked_mc': enh_gene_id_to_binidx(linked_pairs_mc, tsss, enhs, resolution),
            'linked_atac': enh_gene_id_to_binidx(linked_pairs_atac, tsss, enhs, resolution),
            'linked_both': enh_gene_id_to_binidx(linked_pairs_both, tsss, enhs, resolution),
            'correlated_mc': enh_gene_id_to_binidx(correlated_pairs_mc, tsss, enhs, resolution),
            'correlated_atac': enh_gene_id_to_binidx(correlated_pairs_atac, tsss, enhs, resolution),
            'correlated_both': enh_gene_id_to_binidx(correlated_pairs_both, tsss, enhs, resolution),
        })
        track_names = list(paired_bin_tables.keys())

        # contact stats: distance bin by corr values in that bin
        (_contacts, _contacts_sig_pairs
        ) = get_contacts(mat_all, paired_bin_tables, resolution, distance_idx)

        # combine all chromosomes 
        for idx, dist in enumerate(distances):
            contacts[idx] += list(_contacts[idx])
            for key in _contacts_sig_pairs.keys():
                contacts_sig_pairs[key][idx] += list(_contacts_sig_pairs[key][idx])
#         break

    # stats
    contacts_mean = np.array([np.nanmean(val) for val in contacts])
    contacts_std = np.array([np.nanstd(val) for val in contacts])
    contacts_n = np.array([len(val) for val in contacts])

    contacts_sig_pairs_mean = collections.OrderedDict({
        key: np.zeros(len(distances)) 
        for key in contacts_sig_pairs.keys()
    })
    contacts_sig_pairs_std = contacts_sig_pairs_mean.copy()
    contacts_sig_pairs_n = contacts_sig_pairs_mean.copy()

    for key in contacts_sig_pairs.keys():
        contacts_sig_pairs_mean[key] = np.array([np.nanmean(val) 
                                                 for val in contacts_sig_pairs[key]])

        contacts_sig_pairs_std[key] = np.array([np.nanstd(val) 
                                                for val in contacts_sig_pairs[key]])
        
        contacts_sig_pairs_n[key] = np.array([len(val) 
                                              for val in contacts_sig_pairs[key]])

    
    # run and save results
    output_res = output_res_format.format(int(100*active_th), celltype, resolution, today)
    print(output_res)

    # save results
    # distances, contacts_mean, contacts_std ## these are numpy arrays 
    # contacts_sig_pairs_mean, contacts_sig_pairs_std ## these
    with open(output_res, 'wb') as fwh:
        pickle.dump((distances, 
                     contacts_mean, contacts_std, contacts_n,
                     contacts_sig_pairs_mean, 
                     contacts_sig_pairs_std,
                     contacts_sig_pairs_n,
                    ), fwh)
#     break

L2_3
L2_3 10000 chr1


100%|██████████| 10/10 [00:00<00:00, 176.88it/s]


L2_3 10000 chr2


100%|██████████| 10/10 [00:00<00:00, 284.22it/s]


L2_3 10000 chr3


100%|██████████| 10/10 [00:00<00:00, 309.76it/s]


L2_3 10000 chr4


100%|██████████| 10/10 [00:00<00:00, 322.31it/s]


L2_3 10000 chr5


100%|██████████| 10/10 [00:00<00:00, 311.17it/s]


L2_3 10000 chr6


100%|██████████| 10/10 [00:00<00:00, 330.17it/s]


L2_3 10000 chr7


100%|██████████| 10/10 [00:00<00:00, 346.23it/s]


L2_3 10000 chr8


100%|██████████| 10/10 [00:00<00:00, 349.52it/s]


L2_3 10000 chr9


100%|██████████| 10/10 [00:00<00:00, 361.47it/s]


L2_3 10000 chr10


100%|██████████| 10/10 [00:00<00:00, 345.92it/s]


L2_3 10000 chr11


100%|██████████| 10/10 [00:00<00:00, 337.33it/s]


L2_3 10000 chr12


100%|██████████| 10/10 [00:00<00:00, 333.64it/s]


L2_3 10000 chr13


100%|██████████| 10/10 [00:00<00:00, 394.39it/s]


L2_3 10000 chr14


100%|██████████| 10/10 [00:00<00:00, 382.52it/s]


L2_3 10000 chr15


100%|██████████| 10/10 [00:00<00:00, 370.08it/s]


L2_3 10000 chr16


100%|██████████| 10/10 [00:00<00:00, 401.39it/s]


L2_3 10000 chr17


100%|██████████| 10/10 [00:00<00:00, 362.68it/s]


L2_3 10000 chr18


100%|██████████| 10/10 [00:00<00:00, 416.87it/s]


L2_3 10000 chr19


100%|██████████| 10/10 [00:00<00:00, 519.82it/s]


L2_3 10000 chr20


100%|██████████| 10/10 [00:00<00:00, 412.35it/s]


L2_3 10000 chr21


100%|██████████| 10/10 [00:00<00:00, 645.89it/s]


L2_3 10000 chr22


100%|██████████| 10/10 [00:00<00:00, 417.81it/s]


L2_3 10000 chrX


100%|██████████| 10/10 [00:00<00:00, 419.89it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_celltypespec50_L2_3_10000_2020-12-15.pkl
L4
L4 10000 chr1


100%|██████████| 10/10 [00:00<00:00, 269.87it/s]


L4 10000 chr2


100%|██████████| 10/10 [00:00<00:00, 294.49it/s]


L4 10000 chr3


100%|██████████| 10/10 [00:00<00:00, 307.77it/s]


L4 10000 chr4


100%|██████████| 10/10 [00:00<00:00, 325.33it/s]


L4 10000 chr5


100%|██████████| 10/10 [00:00<00:00, 312.71it/s]


L4 10000 chr6


100%|██████████| 10/10 [00:00<00:00, 331.46it/s]


L4 10000 chr7


100%|██████████| 10/10 [00:00<00:00, 349.28it/s]


L4 10000 chr8


100%|██████████| 10/10 [00:00<00:00, 362.61it/s]


L4 10000 chr9


100%|██████████| 10/10 [00:00<00:00, 366.57it/s]


L4 10000 chr10


100%|██████████| 10/10 [00:00<00:00, 346.12it/s]


L4 10000 chr11


100%|██████████| 10/10 [00:00<00:00, 337.26it/s]


L4 10000 chr12


100%|██████████| 10/10 [00:00<00:00, 333.96it/s]


L4 10000 chr13


100%|██████████| 10/10 [00:00<00:00, 403.44it/s]


L4 10000 chr14


100%|██████████| 10/10 [00:00<00:00, 397.63it/s]


L4 10000 chr15


100%|██████████| 10/10 [00:00<00:00, 364.54it/s]


L4 10000 chr16


100%|██████████| 10/10 [00:00<00:00, 393.13it/s]


L4 10000 chr17


100%|██████████| 10/10 [00:00<00:00, 372.62it/s]


L4 10000 chr18


100%|██████████| 10/10 [00:00<00:00, 402.70it/s]


L4 10000 chr19


100%|██████████| 10/10 [00:00<00:00, 553.88it/s]


L4 10000 chr20


100%|██████████| 10/10 [00:00<00:00, 400.66it/s]


L4 10000 chr21


100%|██████████| 10/10 [00:00<00:00, 575.60it/s]


L4 10000 chr22


100%|██████████| 10/10 [00:00<00:00, 417.34it/s]


L4 10000 chrX


100%|██████████| 10/10 [00:00<00:00, 462.34it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_celltypespec50_L4_10000_2020-12-15.pkl
L5
L5 10000 chr1


100%|██████████| 10/10 [00:00<00:00, 260.95it/s]


L5 10000 chr2


100%|██████████| 10/10 [00:00<00:00, 289.49it/s]


L5 10000 chr3


100%|██████████| 10/10 [00:00<00:00, 313.11it/s]


L5 10000 chr4


100%|██████████| 10/10 [00:00<00:00, 331.12it/s]


L5 10000 chr5


100%|██████████| 10/10 [00:00<00:00, 312.73it/s]


L5 10000 chr6


100%|██████████| 10/10 [00:00<00:00, 343.60it/s]


L5 10000 chr7


100%|██████████| 10/10 [00:00<00:00, 336.94it/s]


L5 10000 chr8


100%|██████████| 10/10 [00:00<00:00, 361.62it/s]


L5 10000 chr9


100%|██████████| 10/10 [00:00<00:00, 352.37it/s]


L5 10000 chr10


100%|██████████| 10/10 [00:00<00:00, 353.77it/s]


L5 10000 chr11


100%|██████████| 10/10 [00:00<00:00, 327.79it/s]


L5 10000 chr12


100%|██████████| 10/10 [00:00<00:00, 337.71it/s]


L5 10000 chr13


100%|██████████| 10/10 [00:00<00:00, 414.64it/s]


L5 10000 chr14


100%|██████████| 10/10 [00:00<00:00, 379.23it/s]


L5 10000 chr15


100%|██████████| 10/10 [00:00<00:00, 379.17it/s]


L5 10000 chr16


100%|██████████| 10/10 [00:00<00:00, 388.03it/s]


L5 10000 chr17


100%|██████████| 10/10 [00:00<00:00, 347.67it/s]


L5 10000 chr18


100%|██████████| 10/10 [00:00<00:00, 422.07it/s]


L5 10000 chr19


100%|██████████| 10/10 [00:00<00:00, 532.54it/s]


L5 10000 chr20


100%|██████████| 10/10 [00:00<00:00, 409.62it/s]


L5 10000 chr21


100%|██████████| 10/10 [00:00<00:00, 567.33it/s]


L5 10000 chr22


100%|██████████| 10/10 [00:00<00:00, 470.89it/s]


L5 10000 chrX


100%|██████████| 10/10 [00:00<00:00, 444.29it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_celltypespec50_L5_10000_2020-12-15.pkl
L6
L6 10000 chr1


100%|██████████| 10/10 [00:00<00:00, 274.20it/s]


L6 10000 chr2


100%|██████████| 10/10 [00:00<00:00, 285.44it/s]


L6 10000 chr3


100%|██████████| 10/10 [00:00<00:00, 313.92it/s]


L6 10000 chr4


100%|██████████| 10/10 [00:00<00:00, 333.70it/s]


L6 10000 chr5


100%|██████████| 10/10 [00:00<00:00, 307.38it/s]


L6 10000 chr6


100%|██████████| 10/10 [00:00<00:00, 340.32it/s]


L6 10000 chr7


100%|██████████| 10/10 [00:00<00:00, 352.83it/s]


L6 10000 chr8


100%|██████████| 10/10 [00:00<00:00, 344.28it/s]


L6 10000 chr9


100%|██████████| 10/10 [00:00<00:00, 352.39it/s]


L6 10000 chr10


100%|██████████| 10/10 [00:00<00:00, 356.88it/s]


L6 10000 chr11


100%|██████████| 10/10 [00:00<00:00, 339.55it/s]


L6 10000 chr12


100%|██████████| 10/10 [00:00<00:00, 335.10it/s]


L6 10000 chr13


100%|██████████| 10/10 [00:00<00:00, 454.19it/s]


L6 10000 chr14


100%|██████████| 10/10 [00:00<00:00, 367.68it/s]


L6 10000 chr15


100%|██████████| 10/10 [00:00<00:00, 355.75it/s]


L6 10000 chr16


100%|██████████| 10/10 [00:00<00:00, 395.00it/s]


L6 10000 chr17


100%|██████████| 10/10 [00:00<00:00, 365.27it/s]


L6 10000 chr18


100%|██████████| 10/10 [00:00<00:00, 430.61it/s]


L6 10000 chr19


100%|██████████| 10/10 [00:00<00:00, 560.66it/s]


L6 10000 chr20


100%|██████████| 10/10 [00:00<00:00, 430.54it/s]


L6 10000 chr21


100%|██████████| 10/10 [00:00<00:00, 562.40it/s]


L6 10000 chr22


100%|██████████| 10/10 [00:00<00:00, 458.12it/s]


L6 10000 chrX


100%|██████████| 10/10 [00:00<00:00, 416.51it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_celltypespec50_L6_10000_2020-12-15.pkl
Pvalb
Pvalb 10000 chr1


100%|██████████| 10/10 [00:00<00:00, 268.42it/s]


Pvalb 10000 chr2


100%|██████████| 10/10 [00:00<00:00, 299.13it/s]


Pvalb 10000 chr3


100%|██████████| 10/10 [00:00<00:00, 310.33it/s]


Pvalb 10000 chr4


100%|██████████| 10/10 [00:00<00:00, 328.09it/s]


Pvalb 10000 chr5


100%|██████████| 10/10 [00:00<00:00, 319.01it/s]


Pvalb 10000 chr6


100%|██████████| 10/10 [00:00<00:00, 347.73it/s]


Pvalb 10000 chr7


100%|██████████| 10/10 [00:00<00:00, 354.46it/s]


Pvalb 10000 chr8


100%|██████████| 10/10 [00:00<00:00, 378.94it/s]


Pvalb 10000 chr9


100%|██████████| 10/10 [00:00<00:00, 357.63it/s]


Pvalb 10000 chr10


100%|██████████| 10/10 [00:00<00:00, 361.58it/s]


Pvalb 10000 chr11


100%|██████████| 10/10 [00:00<00:00, 349.63it/s]


Pvalb 10000 chr12


100%|██████████| 10/10 [00:00<00:00, 333.43it/s]


Pvalb 10000 chr13


100%|██████████| 10/10 [00:00<00:00, 429.54it/s]


Pvalb 10000 chr14


100%|██████████| 10/10 [00:00<00:00, 456.71it/s]


Pvalb 10000 chr15


100%|██████████| 10/10 [00:00<00:00, 401.55it/s]


Pvalb 10000 chr16


100%|██████████| 10/10 [00:00<00:00, 406.42it/s]


Pvalb 10000 chr17


100%|██████████| 10/10 [00:00<00:00, 359.58it/s]


Pvalb 10000 chr18


100%|██████████| 10/10 [00:00<00:00, 438.63it/s]


Pvalb 10000 chr19


100%|██████████| 10/10 [00:00<00:00, 592.37it/s]


Pvalb 10000 chr20


100%|██████████| 10/10 [00:00<00:00, 436.07it/s]


Pvalb 10000 chr21


100%|██████████| 10/10 [00:00<00:00, 659.98it/s]


Pvalb 10000 chr22


100%|██████████| 10/10 [00:00<00:00, 449.44it/s]


Pvalb 10000 chrX


100%|██████████| 10/10 [00:00<00:00, 533.94it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_celltypespec50_Pvalb_10000_2020-12-15.pkl
Sst
Sst 10000 chr1


100%|██████████| 10/10 [00:00<00:00, 274.97it/s]


Sst 10000 chr2


100%|██████████| 10/10 [00:00<00:00, 292.54it/s]


Sst 10000 chr3


100%|██████████| 10/10 [00:00<00:00, 312.71it/s]


Sst 10000 chr4


100%|██████████| 10/10 [00:00<00:00, 360.04it/s]


Sst 10000 chr5


100%|██████████| 10/10 [00:00<00:00, 322.57it/s]


Sst 10000 chr6


100%|██████████| 10/10 [00:00<00:00, 358.45it/s]


Sst 10000 chr7


100%|██████████| 10/10 [00:00<00:00, 373.70it/s]


Sst 10000 chr8


100%|██████████| 10/10 [00:00<00:00, 382.42it/s]


Sst 10000 chr9


100%|██████████| 10/10 [00:00<00:00, 356.21it/s]


Sst 10000 chr10


100%|██████████| 10/10 [00:00<00:00, 398.94it/s]


Sst 10000 chr11


100%|██████████| 10/10 [00:00<00:00, 351.98it/s]


Sst 10000 chr12


100%|██████████| 10/10 [00:00<00:00, 351.65it/s]


Sst 10000 chr13


100%|██████████| 10/10 [00:00<00:00, 473.33it/s]


Sst 10000 chr14


100%|██████████| 10/10 [00:00<00:00, 363.91it/s]


Sst 10000 chr15


100%|██████████| 10/10 [00:00<00:00, 448.36it/s]


Sst 10000 chr16


100%|██████████| 10/10 [00:00<00:00, 401.80it/s]


Sst 10000 chr17


100%|██████████| 10/10 [00:00<00:00, 408.60it/s]


Sst 10000 chr18


100%|██████████| 10/10 [00:00<00:00, 426.29it/s]


Sst 10000 chr19


100%|██████████| 10/10 [00:00<00:00, 569.64it/s]


Sst 10000 chr20


100%|██████████| 10/10 [00:00<00:00, 444.52it/s]


Sst 10000 chr21


100%|██████████| 10/10 [00:00<00:00, 690.15it/s]


Sst 10000 chr22


100%|██████████| 10/10 [00:00<00:00, 495.25it/s]


Sst 10000 chrX


100%|██████████| 10/10 [00:00<00:00, 453.90it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_celltypespec50_Sst_10000_2020-12-15.pkl
Vip
Vip 10000 chr1


100%|██████████| 10/10 [00:00<00:00, 283.22it/s]


Vip 10000 chr2


100%|██████████| 10/10 [00:00<00:00, 294.05it/s]


Vip 10000 chr3


100%|██████████| 10/10 [00:00<00:00, 324.45it/s]


Vip 10000 chr4


100%|██████████| 10/10 [00:00<00:00, 338.33it/s]


Vip 10000 chr5


100%|██████████| 10/10 [00:00<00:00, 330.38it/s]


Vip 10000 chr6


100%|██████████| 10/10 [00:00<00:00, 339.67it/s]


Vip 10000 chr7


100%|██████████| 10/10 [00:00<00:00, 347.71it/s]


Vip 10000 chr8


100%|██████████| 10/10 [00:00<00:00, 390.67it/s]


Vip 10000 chr9


100%|██████████| 10/10 [00:00<00:00, 382.08it/s]


Vip 10000 chr10


100%|██████████| 10/10 [00:00<00:00, 375.48it/s]


Vip 10000 chr11


100%|██████████| 10/10 [00:00<00:00, 364.80it/s]


Vip 10000 chr12


100%|██████████| 10/10 [00:00<00:00, 370.43it/s]


Vip 10000 chr13


100%|██████████| 10/10 [00:00<00:00, 426.98it/s]


Vip 10000 chr14


100%|██████████| 10/10 [00:00<00:00, 484.52it/s]


Vip 10000 chr15


100%|██████████| 10/10 [00:00<00:00, 412.78it/s]


Vip 10000 chr16


100%|██████████| 10/10 [00:00<00:00, 399.09it/s]


Vip 10000 chr17


100%|██████████| 10/10 [00:00<00:00, 386.91it/s]


Vip 10000 chr18


100%|██████████| 10/10 [00:00<00:00, 461.44it/s]


Vip 10000 chr19


100%|██████████| 10/10 [00:00<00:00, 620.46it/s]


Vip 10000 chr20


100%|██████████| 10/10 [00:00<00:00, 467.37it/s]


Vip 10000 chr21


100%|██████████| 10/10 [00:00<00:00, 600.02it/s]


Vip 10000 chr22


100%|██████████| 10/10 [00:00<00:00, 466.51it/s]


Vip 10000 chrX


100%|██████████| 10/10 [00:00<00:00, 481.55it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_celltypespec50_Vip_10000_2020-12-15.pkl
Ndnf
Ndnf 10000 chr1


100%|██████████| 10/10 [00:00<00:00, 280.49it/s]


Ndnf 10000 chr2


100%|██████████| 10/10 [00:00<00:00, 295.09it/s]


Ndnf 10000 chr3


100%|██████████| 10/10 [00:00<00:00, 315.74it/s]


Ndnf 10000 chr4


100%|██████████| 10/10 [00:00<00:00, 362.18it/s]


Ndnf 10000 chr5


100%|██████████| 10/10 [00:00<00:00, 314.83it/s]


Ndnf 10000 chr6


100%|██████████| 10/10 [00:00<00:00, 349.50it/s]


Ndnf 10000 chr7


100%|██████████| 10/10 [00:00<00:00, 362.52it/s]


Ndnf 10000 chr8


100%|██████████| 10/10 [00:00<00:00, 163.17it/s]


Ndnf 10000 chr9


100%|██████████| 10/10 [00:00<00:00, 348.09it/s]


Ndnf 10000 chr10


100%|██████████| 10/10 [00:00<00:00, 357.33it/s]


Ndnf 10000 chr11


100%|██████████| 10/10 [00:00<00:00, 151.16it/s]


Ndnf 10000 chr12


100%|██████████| 10/10 [00:00<00:00, 343.29it/s]


Ndnf 10000 chr13


100%|██████████| 10/10 [00:00<00:00, 438.48it/s]


Ndnf 10000 chr14


100%|██████████| 10/10 [00:00<00:00, 374.76it/s]


Ndnf 10000 chr15


100%|██████████| 10/10 [00:00<00:00, 402.79it/s]


Ndnf 10000 chr16


100%|██████████| 10/10 [00:00<00:00, 435.58it/s]


Ndnf 10000 chr17


100%|██████████| 10/10 [00:00<00:00, 368.02it/s]


Ndnf 10000 chr18


100%|██████████| 10/10 [00:00<00:00, 411.45it/s]


Ndnf 10000 chr19


100%|██████████| 10/10 [00:00<00:00, 666.37it/s]


Ndnf 10000 chr20


100%|██████████| 10/10 [00:00<00:00, 186.52it/s]


Ndnf 10000 chr21


100%|██████████| 10/10 [00:00<00:00, 719.89it/s]


Ndnf 10000 chr22


100%|██████████| 10/10 [00:00<00:00, 436.39it/s]


Ndnf 10000 chrX


100%|██████████| 10/10 [00:00<00:00, 521.11it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_celltypespec50_Ndnf_10000_2020-12-15.pkl


# rerun everything repeatly over different liftOver thresholds 

In [1]:
import cooler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import multiprocessing as mp
import h5py
import pickle
import seaborn as sns
import tqdm

from scipy import stats

from statsmodels.stats.multitest import multipletests

import sys
import itertools
import time
import datetime

sys.path.insert(0, '/cndd/fangming/CEMBA/snmcseq_dev')
from __init__ import *
from __init__jupyterlab import * 
import snmcseq_utils

from enhancer_gene_hic_validation_utils import *

In [2]:
# default_colorbar_kws = DEFAULT_COLORBAR_KWS
today = datetime.date.today()

print(today)

2020-12-17


In [3]:
chromosomes = ['chr'+str(i+1) for i in range(22)] + ['chrX']
celltypes = ['L2_3', 'L4', 'L5', 'L6', 
             'Pvalb', 'Sst', 'Vip', 'Ndnf']
resolution = 10000

In [4]:
# gene expression profiles
f = '/sphere/fangming/enhancers/scripts/data_organized/gene_rna_clsts.tsv'
gene_rna_clsts = pd.read_csv(f, sep='\t', index_col=0) #header=True, index=True)

# cluster annotation
f = '/cndd2/fangming/projects/scf_enhancers/enhancer_ethan38_200520/ref/annotations_order.tsv'
clst_annot = pd.read_csv(f, sep='\t').set_index('cluster')['annotation']

# merge cluster name
annot_rename_lookup = {
    'L2/3': "L2_3",
    'L4/5': "L4",
    'L6b': "L6",
    'Sncg': "Ndnf",
    'Lamp5': "Ndnf",
}

def rename_annot(x, annot_rename_lookup):
    """
    """
    if x in annot_rename_lookup.keys():
        return annot_rename_lookup[x]
    else:
        return x
    
clst_annot_broad = pd.Series([annot.split(' ')[0] for annot in clst_annot.values], 
                             index=clst_annot.index)
clst_annot_broad = clst_annot_broad.apply(lambda x: rename_annot(x, annot_rename_lookup))

print(clst_annot_broad.shape, 
      len(np.unique(clst_annot_broad.values)),
      np.unique(clst_annot_broad.values),
     )

(38,) 8 ['L2_3' 'L4' 'L5' 'L6' 'Ndnf' 'Pvalb' 'Sst' 'Vip']


In [5]:
# merge RNA profiles
_tmp = gene_rna_clsts.T
_tmp['annot'] = clst_annot_broad.loc[_tmp.index]
gene_rna_clsts_broad = _tmp.groupby('annot').median().T
gene_rna_clsts_broad_rank = gene_rna_clsts_broad.rank(pct=True, axis=1)
print(gene_rna_clsts_broad_rank.shape)
gene_rna_clsts_broad_rank.head()

(12095, 8)


annot,L2_3,L4,L5,L6,Ndnf,Pvalb,Sst,Vip
ENSMUSG00000000001,0.5,0.75,0.25,0.625,0.875,0.125,0.375,1.0
ENSMUSG00000000056,0.125,0.75,0.875,0.5,0.625,1.0,0.25,0.375
ENSMUSG00000000058,0.625,1.0,0.25,0.375,0.75,0.125,0.5,0.875
ENSMUSG00000000078,0.25,1.0,0.625,0.75,0.125,0.5,0.875,0.375
ENSMUSG00000000085,0.25,0.125,0.625,0.375,0.875,0.5,0.75,1.0


In [6]:
# min_matches = np.arange(0.1, 1.00+0.05, 0.05)
min_matches = [0.80]
print(min_matches)

[0.8]


In [7]:
# enhancers lifted

for min_match in tqdm.tqdm(min_matches):
    print("**********{}".format(min_match))

    output_res_format = ('/sphere/fangming/enhancers/scripts/results_new/'
                  'hic_contact_enrichment_metacell_liftover{:.2f}_celltypespec_{{}}_{{}}_{{}}_{{}}.pkl' #.format(int(100*active_th), celltype, resolution, today)
                  .format(min_match)
                 )

    f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/liftover/enhs_list_reformat_minmatch_{:.2f}_lifted.bed'.format(min_match)
    enh_list = pd.read_csv(f, sep='\t', header=None, names=['chr', 'start', 'end', 'enh_id']).astype({'chr': str, 
                                                                                                      'start': int,
                                                                                                      'end': int,
                                                                                                      'enh_id': int,
                                                                                                     })
    enh_list['center'] = (enh_list['start'] + enh_list['end'])/2

    # tss lifted
    f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/liftover/gencode.vM16.annotation_genes_tss_lifted.bed'
    tss_list = pd.read_csv(f, sep='\t', header=None, names=['chr', 'start', 'end', 'gid', 'gene_name'])
    tss_list['gid'] = tss_list['gid'].apply(lambda x: x.split('.')[0])

    # enh - tss linked list
    # orig significant pairs (enh_index and gid)
    f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/results_new/fig2_metacell_201213_linked_pairs_mc.tsv'
    linked_pairs_mc = pd.read_csv(f, sep="\t")

    f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/results_new/fig2_metacell_201213_linked_pairs_atac.tsv'
    linked_pairs_atac = pd.read_csv(f, sep="\t")

    f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/results_new/fig2_metacell_201213_correlated_pairs_mc.tsv'
    correlated_pairs_mc = pd.read_csv(f, sep="\t")

    f = '/cndd2/fangming/projects/scf_enhancers/enhancer_metacells_r100_n935/results_new/fig2_metacell_201213_correlated_pairs_atac.tsv'
    correlated_pairs_atac = pd.read_csv(f, sep="\t")
    
    
    # subtract linked from correlated
    # genes can be divided into 4 non-overlapping quardrone: linked vs correlated; mc vs atac
    # below groups the 4 quardrone into 3 different groups
    correlated_pairs_mc = (pd.merge(linked_pairs_mc, correlated_pairs_mc, how='right', indicator=True)
                             .query('_merge=="right_only"')
                             .drop('_merge', axis=1)
                          )
    correlated_pairs_atac = (pd.merge(linked_pairs_atac, correlated_pairs_atac, how='right', indicator=True)
                             .query('_merge=="right_only"')
                             .drop('_merge', axis=1)
                          )
#     correlated_pairs_both = (pd.merge(linked_pairs_both, correlated_pairs_both, how='right', indicator=True)
#                              .query('_merge=="right_only"')
#                              .drop('_merge', axis=1)
#                           )
    linked_pairs_both = pd.merge(linked_pairs_mc, linked_pairs_atac, on=['gene', 'enh', 'dist'])
    correlated_pairs_both = pd.merge(correlated_pairs_mc, correlated_pairs_atac, on=['gene', 'enh', 'dist'])

    active_th = 0.5
    # distances
    distance_cutoff = 1e5
    distance_idx = np.arange(1, int(distance_cutoff/resolution)+1)
    distances = distance_idx * resolution

    for celltype in celltypes:
        print(celltype)

        contacts = [[] for i in np.arange(len(distances))]
        contacts_sig_pairs = collections.OrderedDict({
                'linked_mc': [[] for i in np.arange(len(distances))],
                'linked_atac': [[] for i in np.arange(len(distances))],
                'linked_both': [[] for i in np.arange(len(distances))],
                'correlated_mc': [[] for i in np.arange(len(distances))],
                'correlated_atac': [[] for i in np.arange(len(distances))],
                'correlated_both': [[] for i in np.arange(len(distances))],
            })

        for chromosome in chromosomes:
            print(celltype, resolution, chromosome)

            # read in the matrix 
            resource_string = (
                 '/sphere/fangming/enhancers/data_m3c/{}_all_brain.txt_1kb_contacts.mcool'
                 '::/resolutions/{}'.format(celltype, resolution)
                 )
            c = cooler.Cooler(resource_string)
            mat_all = c.matrix(balance=True).fetch(chromosome)

            # all enhancers and tss on that chromosomes
            enhs = enh_list[enh_list['chr']==chromosome]
            tsss = tss_list[tss_list['chr']==chromosome]

            # only include the gene if it is actively expressed (>50% across cell types)
            active_genes = gene_rna_clsts_broad_rank[gene_rna_clsts_broad_rank[celltype]>active_th].index.values
            tsss = tsss[tsss['gid'].isin(active_genes)]
            
            

            # get paired bins
            paired_bin_tables = collections.OrderedDict({
                'linked_mc': enh_gene_id_to_binidx(linked_pairs_mc, tsss, enhs, resolution),
                'linked_atac': enh_gene_id_to_binidx(linked_pairs_atac, tsss, enhs, resolution),
                'linked_both': enh_gene_id_to_binidx(linked_pairs_both, tsss, enhs, resolution),
                'correlated_mc': enh_gene_id_to_binidx(correlated_pairs_mc, tsss, enhs, resolution),
                'correlated_atac': enh_gene_id_to_binidx(correlated_pairs_atac, tsss, enhs, resolution),
                'correlated_both': enh_gene_id_to_binidx(correlated_pairs_both, tsss, enhs, resolution),
            })
            track_names = list(paired_bin_tables.keys())

            # contact stats: distance bin by corr values in that bin
            (_contacts, _contacts_sig_pairs
            ) = get_contacts(mat_all, paired_bin_tables, resolution, distance_idx)

            # combine all chromosomes 
            for idx, dist in enumerate(distances):
                contacts[idx] += list(_contacts[idx])
                for key in _contacts_sig_pairs.keys():
                    contacts_sig_pairs[key][idx] += list(_contacts_sig_pairs[key][idx])
    #         break

        # stats
        contacts_mean = np.array([np.nanmean(val) for val in contacts])
        contacts_std = np.array([np.nanstd(val) for val in contacts])
        contacts_n = np.array([len(val) for val in contacts])

        contacts_sig_pairs_mean = collections.OrderedDict({
            key: np.zeros(len(distances)) 
            for key in contacts_sig_pairs.keys()
        })
        contacts_sig_pairs_std = contacts_sig_pairs_mean.copy()
        contacts_sig_pairs_n = contacts_sig_pairs_mean.copy()

        for key in contacts_sig_pairs.keys():
            contacts_sig_pairs_mean[key] = np.array([np.nanmean(val) 
                                                     for val in contacts_sig_pairs[key]])

            contacts_sig_pairs_std[key] = np.array([np.nanstd(val) 
                                                    for val in contacts_sig_pairs[key]])

            contacts_sig_pairs_n[key] = np.array([len(val) 
                                                  for val in contacts_sig_pairs[key]])


        # run and save results
        output_res = output_res_format.format(int(100*active_th), celltype, resolution, today)
        print(output_res)

        # save results
        # distances, contacts_mean, contacts_std ## these are numpy arrays 
        # contacts_sig_pairs_mean, contacts_sig_pairs_std ## these
        with open(output_res, 'wb') as fwh:
            pickle.dump((distances, 
                         contacts_mean, contacts_std, contacts_n,
                         contacts_sig_pairs_mean, 
                         contacts_sig_pairs_std,
                         contacts_sig_pairs_n,
                        ), fwh)

  0%|          | 0/1 [00:00<?, ?it/s]

**********0.8
L2_3
L2_3 10000 chr1



100%|██████████| 10/10 [00:00<00:00, 265.03it/s]


L2_3 10000 chr2



100%|██████████| 10/10 [00:00<00:00, 279.32it/s]


L2_3 10000 chr3



100%|██████████| 10/10 [00:00<00:00, 314.47it/s]


L2_3 10000 chr4



100%|██████████| 10/10 [00:00<00:00, 325.38it/s]


L2_3 10000 chr5



100%|██████████| 10/10 [00:00<00:00, 312.91it/s]


L2_3 10000 chr6



100%|██████████| 10/10 [00:00<00:00, 334.54it/s]


L2_3 10000 chr7



100%|██████████| 10/10 [00:00<00:00, 338.33it/s]


L2_3 10000 chr8



100%|██████████| 10/10 [00:00<00:00, 339.09it/s]


L2_3 10000 chr9



100%|██████████| 10/10 [00:00<00:00, 359.64it/s]


L2_3 10000 chr10



100%|██████████| 10/10 [00:00<00:00, 347.53it/s]


L2_3 10000 chr11



100%|██████████| 10/10 [00:00<00:00, 335.50it/s]


L2_3 10000 chr12



100%|██████████| 10/10 [00:00<00:00, 336.88it/s]


L2_3 10000 chr13



100%|██████████| 10/10 [00:00<00:00, 382.33it/s]


L2_3 10000 chr14



100%|██████████| 10/10 [00:00<00:00, 371.56it/s]


L2_3 10000 chr15



100%|██████████| 10/10 [00:00<00:00, 359.62it/s]


L2_3 10000 chr16



100%|██████████| 10/10 [00:00<00:00, 395.28it/s]


L2_3 10000 chr17



100%|██████████| 10/10 [00:00<00:00, 355.18it/s]


L2_3 10000 chr18



100%|██████████| 10/10 [00:00<00:00, 412.62it/s]


L2_3 10000 chr19



100%|██████████| 10/10 [00:00<00:00, 520.30it/s]


L2_3 10000 chr20



100%|██████████| 10/10 [00:00<00:00, 407.68it/s]


L2_3 10000 chr21



100%|██████████| 10/10 [00:00<00:00, 621.00it/s]


L2_3 10000 chr22



100%|██████████| 10/10 [00:00<00:00, 404.73it/s]


L2_3 10000 chrX



100%|██████████| 10/10 [00:00<00:00, 424.03it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_liftover0.80_celltypespec_50_L2_3_10000_2020-12-17.pkl
L4
L4 10000 chr1



100%|██████████| 10/10 [00:00<00:00, 277.22it/s]


L4 10000 chr2



100%|██████████| 10/10 [00:00<00:00, 294.43it/s]


L4 10000 chr3



100%|██████████| 10/10 [00:00<00:00, 312.85it/s]


L4 10000 chr4



100%|██████████| 10/10 [00:00<00:00, 320.84it/s]


L4 10000 chr5



100%|██████████| 10/10 [00:00<00:00, 305.14it/s]


L4 10000 chr6



100%|██████████| 10/10 [00:00<00:00, 332.27it/s]


L4 10000 chr7



100%|██████████| 10/10 [00:00<00:00, 340.97it/s]


L4 10000 chr8



100%|██████████| 10/10 [00:00<00:00, 357.23it/s]


L4 10000 chr9



100%|██████████| 10/10 [00:00<00:00, 352.93it/s]


L4 10000 chr10



100%|██████████| 10/10 [00:00<00:00, 348.66it/s]


L4 10000 chr11



100%|██████████| 10/10 [00:00<00:00, 338.64it/s]


L4 10000 chr12



100%|██████████| 10/10 [00:00<00:00, 340.50it/s]


L4 10000 chr13



100%|██████████| 10/10 [00:00<00:00, 392.48it/s]


L4 10000 chr14



100%|██████████| 10/10 [00:00<00:00, 385.51it/s]


L4 10000 chr15



100%|██████████| 10/10 [00:00<00:00, 373.10it/s]


L4 10000 chr16



100%|██████████| 10/10 [00:00<00:00, 399.84it/s]


L4 10000 chr17



100%|██████████| 10/10 [00:00<00:00, 360.91it/s]


L4 10000 chr18



100%|██████████| 10/10 [00:00<00:00, 402.26it/s]


L4 10000 chr19



100%|██████████| 10/10 [00:00<00:00, 523.57it/s]


L4 10000 chr20



100%|██████████| 10/10 [00:00<00:00, 401.54it/s]


L4 10000 chr21



100%|██████████| 10/10 [00:00<00:00, 548.77it/s]


L4 10000 chr22



100%|██████████| 10/10 [00:00<00:00, 436.85it/s]


L4 10000 chrX



100%|██████████| 10/10 [00:00<00:00, 453.98it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_liftover0.80_celltypespec_50_L4_10000_2020-12-17.pkl
L5
L5 10000 chr1



100%|██████████| 10/10 [00:00<00:00, 273.11it/s]


L5 10000 chr2



100%|██████████| 10/10 [00:00<00:00, 292.51it/s]


L5 10000 chr3



100%|██████████| 10/10 [00:00<00:00, 314.66it/s]


L5 10000 chr4



100%|██████████| 10/10 [00:00<00:00, 328.31it/s]


L5 10000 chr5



100%|██████████| 10/10 [00:00<00:00, 312.76it/s]


L5 10000 chr6



100%|██████████| 10/10 [00:00<00:00, 342.63it/s]


L5 10000 chr7



100%|██████████| 10/10 [00:00<00:00, 342.34it/s]


L5 10000 chr8



100%|██████████| 10/10 [00:00<00:00, 363.45it/s]


L5 10000 chr9



100%|██████████| 10/10 [00:00<00:00, 354.02it/s]


L5 10000 chr10



100%|██████████| 10/10 [00:00<00:00, 340.89it/s]


L5 10000 chr11



100%|██████████| 10/10 [00:00<00:00, 335.53it/s]


L5 10000 chr12



100%|██████████| 10/10 [00:00<00:00, 340.09it/s]


L5 10000 chr13



100%|██████████| 10/10 [00:00<00:00, 414.04it/s]


L5 10000 chr14



100%|██████████| 10/10 [00:00<00:00, 360.64it/s]


L5 10000 chr15



100%|██████████| 10/10 [00:00<00:00, 376.01it/s]


L5 10000 chr16



100%|██████████| 10/10 [00:00<00:00, 390.25it/s]


L5 10000 chr17



100%|██████████| 10/10 [00:00<00:00, 306.92it/s]


L5 10000 chr18



100%|██████████| 10/10 [00:00<00:00, 416.28it/s]


L5 10000 chr19



100%|██████████| 10/10 [00:00<00:00, 510.37it/s]


L5 10000 chr20



100%|██████████| 10/10 [00:00<00:00, 402.54it/s]


L5 10000 chr21



100%|██████████| 10/10 [00:00<00:00, 564.02it/s]


L5 10000 chr22



100%|██████████| 10/10 [00:00<00:00, 461.37it/s]


L5 10000 chrX



100%|██████████| 10/10 [00:00<00:00, 444.83it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_liftover0.80_celltypespec_50_L5_10000_2020-12-17.pkl
L6
L6 10000 chr1



100%|██████████| 10/10 [00:00<00:00, 281.70it/s]


L6 10000 chr2



100%|██████████| 10/10 [00:00<00:00, 291.84it/s]


L6 10000 chr3



100%|██████████| 10/10 [00:00<00:00, 312.63it/s]


L6 10000 chr4



100%|██████████| 10/10 [00:00<00:00, 329.78it/s]


L6 10000 chr5



100%|██████████| 10/10 [00:00<00:00, 316.00it/s]


L6 10000 chr6



100%|██████████| 10/10 [00:00<00:00, 329.93it/s]


L6 10000 chr7



100%|██████████| 10/10 [00:00<00:00, 346.71it/s]


L6 10000 chr8



100%|██████████| 10/10 [00:00<00:00, 347.72it/s]


L6 10000 chr9



100%|██████████| 10/10 [00:00<00:00, 352.22it/s]


L6 10000 chr10



100%|██████████| 10/10 [00:00<00:00, 347.78it/s]


L6 10000 chr11



100%|██████████| 10/10 [00:00<00:00, 338.67it/s]


L6 10000 chr12



100%|██████████| 10/10 [00:00<00:00, 145.67it/s]


L6 10000 chr13



100%|██████████| 10/10 [00:00<00:00, 452.31it/s]


L6 10000 chr14



100%|██████████| 10/10 [00:00<00:00, 370.36it/s]


L6 10000 chr15



100%|██████████| 10/10 [00:00<00:00, 378.97it/s]


L6 10000 chr16



100%|██████████| 10/10 [00:00<00:00, 396.36it/s]


L6 10000 chr17



100%|██████████| 10/10 [00:00<00:00, 361.85it/s]


L6 10000 chr18



100%|██████████| 10/10 [00:00<00:00, 418.52it/s]


L6 10000 chr19



100%|██████████| 10/10 [00:00<00:00, 556.99it/s]


L6 10000 chr20



100%|██████████| 10/10 [00:00<00:00, 415.47it/s]


L6 10000 chr21



100%|██████████| 10/10 [00:00<00:00, 525.00it/s]


L6 10000 chr22



100%|██████████| 10/10 [00:00<00:00, 447.86it/s]


L6 10000 chrX



100%|██████████| 10/10 [00:00<00:00, 429.06it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_liftover0.80_celltypespec_50_L6_10000_2020-12-17.pkl
Pvalb
Pvalb 10000 chr1



100%|██████████| 10/10 [00:00<00:00, 272.97it/s]


Pvalb 10000 chr2



100%|██████████| 10/10 [00:00<00:00, 304.13it/s]


Pvalb 10000 chr3



100%|██████████| 10/10 [00:00<00:00, 308.82it/s]


Pvalb 10000 chr4



100%|██████████| 10/10 [00:00<00:00, 323.99it/s]


Pvalb 10000 chr5



100%|██████████| 10/10 [00:00<00:00, 316.87it/s]


Pvalb 10000 chr6



100%|██████████| 10/10 [00:00<00:00, 348.53it/s]


Pvalb 10000 chr7



100%|██████████| 10/10 [00:00<00:00, 258.98it/s]


Pvalb 10000 chr8



100%|██████████| 10/10 [00:00<00:00, 374.67it/s]


Pvalb 10000 chr9



100%|██████████| 10/10 [00:00<00:00, 353.94it/s]


Pvalb 10000 chr10



100%|██████████| 10/10 [00:00<00:00, 352.21it/s]


Pvalb 10000 chr11



100%|██████████| 10/10 [00:00<00:00, 347.49it/s]


Pvalb 10000 chr12



100%|██████████| 10/10 [00:00<00:00, 331.23it/s]


Pvalb 10000 chr13



100%|██████████| 10/10 [00:00<00:00, 423.65it/s]


Pvalb 10000 chr14



100%|██████████| 10/10 [00:00<00:00, 449.81it/s]


Pvalb 10000 chr15



100%|██████████| 10/10 [00:00<00:00, 386.87it/s]


Pvalb 10000 chr16



100%|██████████| 10/10 [00:00<00:00, 395.14it/s]


Pvalb 10000 chr17



100%|██████████| 10/10 [00:00<00:00, 369.09it/s]


Pvalb 10000 chr18



100%|██████████| 10/10 [00:00<00:00, 419.69it/s]


Pvalb 10000 chr19



100%|██████████| 10/10 [00:00<00:00, 256.29it/s]


Pvalb 10000 chr20



100%|██████████| 10/10 [00:00<00:00, 432.29it/s]


Pvalb 10000 chr21



100%|██████████| 10/10 [00:00<00:00, 637.99it/s]


Pvalb 10000 chr22



100%|██████████| 10/10 [00:00<00:00, 190.06it/s]


Pvalb 10000 chrX



100%|██████████| 10/10 [00:00<00:00, 512.37it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_liftover0.80_celltypespec_50_Pvalb_10000_2020-12-17.pkl
Sst
Sst 10000 chr1



100%|██████████| 10/10 [00:00<00:00, 276.05it/s]


Sst 10000 chr2



100%|██████████| 10/10 [00:00<00:00, 289.66it/s]


Sst 10000 chr3



100%|██████████| 10/10 [00:00<00:00, 315.01it/s]


Sst 10000 chr4



100%|██████████| 10/10 [00:00<00:00, 352.53it/s]


Sst 10000 chr5



100%|██████████| 10/10 [00:00<00:00, 240.33it/s]


Sst 10000 chr6



100%|██████████| 10/10 [00:00<00:00, 356.95it/s]


Sst 10000 chr7



100%|██████████| 10/10 [00:00<00:00, 371.83it/s]


Sst 10000 chr8



100%|██████████| 10/10 [00:00<00:00, 371.58it/s]


Sst 10000 chr9



100%|██████████| 10/10 [00:00<00:00, 350.36it/s]


Sst 10000 chr10



100%|██████████| 10/10 [00:00<00:00, 401.74it/s]


Sst 10000 chr11



100%|██████████| 10/10 [00:00<00:00, 151.83it/s]


Sst 10000 chr12



100%|██████████| 10/10 [00:00<00:00, 346.26it/s]


Sst 10000 chr13



100%|██████████| 10/10 [00:00<00:00, 458.16it/s]


Sst 10000 chr14



100%|██████████| 10/10 [00:00<00:00, 359.26it/s]


Sst 10000 chr15



100%|██████████| 10/10 [00:00<00:00, 447.29it/s]


Sst 10000 chr16



100%|██████████| 10/10 [00:00<00:00, 391.76it/s]


Sst 10000 chr17



100%|██████████| 10/10 [00:00<00:00, 167.51it/s]


Sst 10000 chr18



100%|██████████| 10/10 [00:00<00:00, 425.03it/s]


Sst 10000 chr19



100%|██████████| 10/10 [00:00<00:00, 587.16it/s]


Sst 10000 chr20



100%|██████████| 10/10 [00:00<00:00, 430.91it/s]


Sst 10000 chr21



100%|██████████| 10/10 [00:00<00:00, 678.25it/s]


Sst 10000 chr22



100%|██████████| 10/10 [00:00<00:00, 493.23it/s]


Sst 10000 chrX



100%|██████████| 10/10 [00:00<00:00, 459.31it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_liftover0.80_celltypespec_50_Sst_10000_2020-12-17.pkl
Vip
Vip 10000 chr1



100%|██████████| 10/10 [00:00<00:00, 283.58it/s]


Vip 10000 chr2



100%|██████████| 10/10 [00:00<00:00, 297.47it/s]


Vip 10000 chr3



100%|██████████| 10/10 [00:00<00:00, 321.27it/s]


Vip 10000 chr4



100%|██████████| 10/10 [00:00<00:00, 334.82it/s]


Vip 10000 chr5



100%|██████████| 10/10 [00:00<00:00, 320.42it/s]


Vip 10000 chr6



100%|██████████| 10/10 [00:00<00:00, 340.01it/s]


Vip 10000 chr7



100%|██████████| 10/10 [00:00<00:00, 350.64it/s]


Vip 10000 chr8



100%|██████████| 10/10 [00:00<00:00, 165.55it/s]


Vip 10000 chr9



100%|██████████| 10/10 [00:00<00:00, 368.40it/s]


Vip 10000 chr10



100%|██████████| 10/10 [00:00<00:00, 159.81it/s]


Vip 10000 chr11



100%|██████████| 10/10 [00:00<00:00, 359.34it/s]


Vip 10000 chr12



100%|██████████| 10/10 [00:00<00:00, 364.40it/s]


Vip 10000 chr13



100%|██████████| 10/10 [00:00<00:00, 414.26it/s]


Vip 10000 chr14



100%|██████████| 10/10 [00:00<00:00, 473.80it/s]


Vip 10000 chr15



100%|██████████| 10/10 [00:00<00:00, 414.06it/s]


Vip 10000 chr16



100%|██████████| 10/10 [00:00<00:00, 387.16it/s]


Vip 10000 chr17



100%|██████████| 10/10 [00:00<00:00, 377.36it/s]


Vip 10000 chr18



100%|██████████| 10/10 [00:00<00:00, 449.78it/s]


Vip 10000 chr19



100%|██████████| 10/10 [00:00<00:00, 586.61it/s]


Vip 10000 chr20



100%|██████████| 10/10 [00:00<00:00, 473.41it/s]


Vip 10000 chr21



100%|██████████| 10/10 [00:00<00:00, 580.52it/s]


Vip 10000 chr22



100%|██████████| 10/10 [00:00<00:00, 448.57it/s]


Vip 10000 chrX



100%|██████████| 10/10 [00:00<00:00, 467.23it/s]


/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_liftover0.80_celltypespec_50_Vip_10000_2020-12-17.pkl
Ndnf
Ndnf 10000 chr1



100%|██████████| 10/10 [00:00<00:00, 281.23it/s]


Ndnf 10000 chr2



100%|██████████| 10/10 [00:00<00:00, 217.66it/s]


Ndnf 10000 chr3



100%|██████████| 10/10 [00:00<00:00, 315.65it/s]


Ndnf 10000 chr4



100%|██████████| 10/10 [00:00<00:00, 369.27it/s]


Ndnf 10000 chr5



100%|██████████| 10/10 [00:00<00:00, 319.80it/s]


Ndnf 10000 chr6



100%|██████████| 10/10 [00:00<00:00, 344.44it/s]


Ndnf 10000 chr7



100%|██████████| 10/10 [00:00<00:00, 355.78it/s]


Ndnf 10000 chr8



100%|██████████| 10/10 [00:00<00:00, 373.33it/s]


Ndnf 10000 chr9



100%|██████████| 10/10 [00:00<00:00, 358.37it/s]


Ndnf 10000 chr10



100%|██████████| 10/10 [00:00<00:00, 356.16it/s]


Ndnf 10000 chr11



100%|██████████| 10/10 [00:00<00:00, 347.64it/s]


Ndnf 10000 chr12



100%|██████████| 10/10 [00:00<00:00, 342.79it/s]


Ndnf 10000 chr13



100%|██████████| 10/10 [00:00<00:00, 429.43it/s]


Ndnf 10000 chr14



100%|██████████| 10/10 [00:00<00:00, 369.32it/s]


Ndnf 10000 chr15



100%|██████████| 10/10 [00:00<00:00, 167.24it/s]


Ndnf 10000 chr16



100%|██████████| 10/10 [00:00<00:00, 425.83it/s]


Ndnf 10000 chr17



100%|██████████| 10/10 [00:00<00:00, 362.64it/s]


Ndnf 10000 chr18



100%|██████████| 10/10 [00:00<00:00, 411.24it/s]


Ndnf 10000 chr19



100%|██████████| 10/10 [00:00<00:00, 648.14it/s]


Ndnf 10000 chr20



100%|██████████| 10/10 [00:00<00:00, 414.81it/s]


Ndnf 10000 chr21



100%|██████████| 10/10 [00:00<00:00, 695.68it/s]


Ndnf 10000 chr22



100%|██████████| 10/10 [00:00<00:00, 440.13it/s]


Ndnf 10000 chrX



100%|██████████| 10/10 [00:00<00:00, 511.23it/s]
100%|██████████| 1/1 [05:27<00:00, 327.99s/it]

/sphere/fangming/enhancers/scripts/results_new/hic_contact_enrichment_metacell_liftover0.80_celltypespec_50_Ndnf_10000_2020-12-17.pkl



