In [None]:
import sys
import logging
import dbclients
import scgenome.utils

from scgenome.loaders.qc import load_cached_qc_data
from scgenome.db.qc import cache_qc_results
from scgenome.analyses.infer_clones import load_cell_cycle_data

LOGGING_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(format=LOGGING_FORMAT, stream=sys.stderr, level=logging.INFO)

tantalus_api = dbclients.tantalus.TantalusApi()

hmmcopy_tickets = [
#     'SC-1935',
    'SC-1936',
#     'SC-1937',
]

sample_ids = [
#     'SA922',
    'SA921',
#     'SA1090',
]

local_cache_directory = '/Users/mcphera1/Scratch/tantalus_data/'

cn_data = []
segs_data = []
metrics_data = []
align_metrics_data = []

for jira_ticket in hmmcopy_tickets:
    analysis = tantalus_api.get(
        'analysis',
        analysis_type__name='hmmcopy',
        jira_ticket=jira_ticket)

    cache_qc_results(jira_ticket, local_cache_directory)
    hmmcopy_data = load_cached_qc_data(jira_ticket, local_cache_directory, sample_ids=sample_ids)

    cn_data.append(hmmcopy_data['hmmcopy_reads'])
    segs_data.append(hmmcopy_data['hmmcopy_segs'])
    metrics_data.append(hmmcopy_data['hmmcopy_metrics'])
    align_metrics_data.append(hmmcopy_data['align_metrics'])

cn_data = scgenome.utils.concat_with_categories(cn_data)
segs_data = scgenome.utils.concat_with_categories(segs_data)
metrics_data = scgenome.utils.concat_with_categories(metrics_data)
align_metrics_data = scgenome.utils.concat_with_categories(align_metrics_data)

if 'is_s_phase' not in metrics_data:
    cell_cycle_data = load_cell_cycle_data(
        tantalus_api,
        analysis['jira_ticket'])
    cell_cycle_data['cell_id'] = cell_cycle_data['cell_id'].astype('category')

    scgenome.utils.union_categories([
        cn_data,
        metrics_data,
        align_metrics_data,
        cell_cycle_data])

    metrics_data = metrics_data.merge(cell_cycle_data, how='left')
    assert 'cell_id_x' not in metrics_data