In [1]:
import glob
import pathlib
import pandas as pd
import subprocess
from gliderport.preset.notebook import notebook_snakemake

In [2]:
ref_dir= '/ref/m3C'
data_dir = '/data/female-amb/AMB-F-mapping'

## prepare contact_table.tsv

In [3]:
all_pools = [pathlib.Path(path).name for path in glob.glob('/data/female-amb/AMB-F-mapping/*')]

In [None]:
for _pool in all_pools:
    if _pool in ['pool_amb58','pool_amb60','pool_amb65']:
        hic_files = glob.glob(f'{data_dir}/{_pool}/*/*/hic/*.all_reads.3C.contact.tsv.gz')
        cell_id = [pathlib.Path(file).name.split('.')[0] for file in hic_files]
        contact_tables = pd.DataFrame({'cell_id':cell_id, 'hic_files':hic_files})
        contact_tables.to_csv(f'tmp/{_pool}.contact_table.tsv', sep = '\t', header = None, index = None,)

In [16]:
all_pools.remove('pool_amb34')

## prepare snakemake 

In [3]:
groups = [pathlib.Path(path).name.split('.')[0]for path in glob.glob('tmp/*')]
groups

['pool_amb60', 'pool_amb58', 'pool_amb65']

In [4]:
notebook_snakemake(work_dir = 'filter_contact', 
                   notebook_dir = 'template', 
                   groups = groups, 
                   group_files=None, 
                   default_cpu=20, 
                   default_mem_gb=20, 
                   redo_prepare=True)

## filter-contact

In [5]:
blacklist_1d_path = f'{ref_dir}/mm10-blacklist.v2.bed.gz'
blacklist_2d_path = f'{ref_dir}/mm10_2d_blacklist.bedpe'
chrom_size_path = f'{ref_dir}/mm10.main.nochrM.nochrY.chrom.sizes'

In [6]:
filtered_list = glob.glob('/data/female-amb/female-m3c-rmbkl/*')

In [7]:
filtered_list = [pathlib.Path(path).name for path in glob.glob('/data/female-amb/female-m3c-rmbkl/*')]

In [21]:
remained_pools= list(set(filtered_list) ^ set(all_pools))

In [23]:
for _pool in remained_pools:
    output_dir = f'female-m3c-rmbkl/{_pool}/'
    contact_table = f'tmp/{_pool}.contact_table.tsv'

    # Remove blacklist
    command = f"hicluster filter-contact \
--output_dir {output_dir} \
--blacklist_1d_path {blacklist_1d_path} \
--blacklist_2d_path {blacklist_2d_path} \
--chr1 1 \
--pos1 2 \
--chr2 5 \
--pos2 6 \
--contact_table {contact_table} \
--chrom_size_path {chrom_size_path} \
--cpu 20"

    with open('gen-rmbkl.sh', "a") as file:
        file.write(f"{command} &")
        file.write("\n")
with open('gen-rmbkl.sh', "a") as file:
    file.write('wait')