In [1]:
import itertools
import os

from doit.tools import register_doit_as_IPython_magic
register_doit_as_IPython_magic()

from gamtools import cosegregation
from gam_follow_up import config

In [2]:
def get_matrix_task(segregation_file, basename,
                    matrix_type='dprime', output_format='txt.gz',
                    final_dir=None):
    
    chroms = ['chr{}'.format(c) for c in range(1,20)]
    
    def internal_task_function():
    
        for chrom in chroms:

            region = [chrom]

            intermediate_output_file = cosegregation.get_output_file(
                segregation_file, region, matrix_type, output_format)
            
            if final_dir is not None:
                final_output_file = os.path.join(
                    final_dir, os.path.basename(intermediate_output_file))
            else:
                final_output_file = intermediate_output_file

            yield {
                'name': region[0],
                'basename': basename,
                'targets': [final_output_file],
                'file_dep': [segregation_file],
                'actions': [(cosegregation.create_and_save_contact_matrix,
                             (segregation_file, region,
                              final_output_file, output_format, matrix_type))],
                'verbosity': 2
            }
            
    return internal_task_function

In [3]:
segregation_tables = {
    '3np': config.in_data_raw(
        'multiplex-gam-data/gamtools_output/segregation_at_{resolution}.passed_qc.table'),
    '1np': config.in_data_raw(
        '1np-gam-data/gamtools_output/segregation_at_{resolution}.passed_qc.table'),
    'combined': config.in_data_processed(
        'segregation-tables/combined_segregation_at_{resolution}.table')}

resolutions = ['40kb', '250kb', '1Mb']

In [4]:
dict_of_tasks = {}

for dataset, resolution in itertools.product(segregation_tables, resolutions):
    seg_table = segregation_tables[dataset].format(resolution=resolution)
    if not os.path.exists(seg_table):
        raise Exception('No such file {}!'.format(seg_table))
    
    task_name = 'task_{}_matrices_at_{}_resolution'.format(dataset, resolution)
    matrix_dir = config.in_data_processed(
        'contact-matrices/{dataset}/{resolution}').format(
            dataset=dataset,
            resolution=resolution)
    if not os.path.exists(matrix_dir):
        os.makedirs(matrix_dir)
    
    dict_of_tasks[task_name] = get_matrix_task(seg_table, task_name, final_dir=matrix_dir)
    
globals().update(dict_of_tasks)

In [7]:
%doit -n 5

-- task_1np_matrices_at_40kb_resolution:chr1
-- task_1np_matrices_at_40kb_resolution:chr2
-- task_1np_matrices_at_40kb_resolution:chr3
-- task_1np_matrices_at_40kb_resolution:chr4
-- task_1np_matrices_at_40kb_resolution:chr5
-- task_1np_matrices_at_40kb_resolution:chr6
-- task_1np_matrices_at_40kb_resolution:chr7
-- task_1np_matrices_at_40kb_resolution:chr8
-- task_1np_matrices_at_40kb_resolution:chr9
-- task_1np_matrices_at_40kb_resolution:chr10
-- task_1np_matrices_at_40kb_resolution:chr11
-- task_1np_matrices_at_40kb_resolution:chr12
-- task_1np_matrices_at_40kb_resolution:chr13
-- task_1np_matrices_at_40kb_resolution:chr14
-- task_1np_matrices_at_40kb_resolution:chr15
-- task_1np_matrices_at_40kb_resolution:chr16
-- task_1np_matrices_at_40kb_resolution:chr17
-- task_1np_matrices_at_40kb_resolution:chr18
-- task_1np_matrices_at_40kb_resolution:chr19
-- task_combined_matrices_at_40kb_resolution:chr1
-- task_combined_matrices_at_40kb_resolution:chr2
-- task_combined_matrices_at_40kb_r