In [1]:
import glob
import os
import sys
import tempfile
import shutil
from ast import literal_eval
from tqdm import tqdm

import anndata
from ete3 import Tree
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.cluster import hierarchy

sys.path.insert(0, '/lab/solexa_weissman/kmin/git/KPTracer-release/cassiopeia-kp')
sys.path.append('/lab/solexa_weissman/kmin/kp_infercnv/NT')
from utilities import plot_tree_itol
from utilities import clonal_expansions

In [2]:
apiKey = ''
projectName = 'KP_Trees_Joseph' ## Specify the project name
plot_dir = 'plots'

In [3]:
target = '3730_NT_T2'
print(target)    
threshold = '0.2'
out_dir = f'{target}_grouped{"_0.2" if threshold == "0.2" else ""}'

# Load CNVs
df_regions = pd.read_csv(f'{out_dir}/HMM_CNV_predictions.HMMi6.rand_trees.hmm_mode-subclusters.Pnorm_{threshold}.pred_cnv_regions.dat', sep='\t')
df_cells = pd.read_csv(f'{out_dir}/17_HMM_predHMMi6.rand_trees.hmm_mode-subclusters.cell_groupings', sep='\t')

# Load Cassiopeia tree
df_model = pd.read_csv('/lab/solexa_weissman/mgjones/projects/kptc/trees/tumor_model.txt', sep='\t', index_col=0)
tree_cass = Tree(f'/lab/solexa_weissman/mgjones/projects/kptc/trees/{target}/{df_model.loc[target]["Newick"]}', format=1)

# Load expansions
expansions = []
for expansion_path in glob.glob(f'/lab/solexa_weissman/mgjones/projects/kptc/trees/{target}/clonal_expansions.{target}.expansion*.txt'):
    expansions.append(list(pd.read_csv(expansion_path, sep='\t', index_col=0)['0']))

# Cells present in both trees
cells = set(df_cells['cell']).intersection(tree_cass.get_leaf_names())

df_cells = df_cells[df_cells['cell'].isin(cells)]
cnv_counts = dict(pd.merge(df_regions, df_cells, on='cell_group_name')[['cell', 'cnv_name']].groupby('cell').size())
max_count = 17

cmap = mpl.cm.viridis
colors = {
    cell: mpl.colors.to_hex(cmap(min(cnv_counts.get(cell, 0), max_count) / max_count))
    for cell in cells
}

files = []
temp_dir = tempfile.mkdtemp()
path = os.path.join(temp_dir, 'colorstrip.txt')
header = 'DATASET_COLORSTRIP\nSEPARATOR TAB\nCOLOR\t#000000\nDATASET_LABEL\tcnv_count\nSTRIP_WIDTH\t100\nMARGIN\t80\nSHOW_INTERNAL\t0\nDATA\n\n'
with open(path, 'w') as f:
    f.write(header)
    for cell, color in colors.items():
        f.write(f'{cell}\t{color}\n')
files.append(path)

# Style for expansions
path = os.path.join(temp_dir, 'styles.txt')
header = f'DATASET_STYLE\nSEPARATOR TAB\nDATASET_LABEL\tstyles\nCOLOR\t#000000\nDATA\n\n'
with open(path, 'w') as f:
    f.write(header)
    for expansion in expansions:
        ancestor = tree_cass.get_common_ancestor(*expansion)
        f.write(f'{ancestor.name}\tbranch\tclade\t#FF0000\t1\tnormal\n')
files.append(path)

plot_tree_itol.upload_to_itol(
    tree_cass,
    apiKey,
    projectName,
    target,
    files=files,
    outfp=os.path.join(plot_dir, f'{target}_cnv_count.pdf'),
    rect=False,
    line_width=8,
)
shutil.rmtree(temp_dir, ignore_errors=True)

3730_NT_T2
iTOL output: SUCCESS: 1841168140801639431876

Tree Web Page URL: http://itol.embl.de/external.cgi?tree=1841168140801639431876&restore_saved=1


In [5]:
from IPython.display import IFrame
IFrame(os.path.join(plot_dir, f'{target}_cnv_count.pdf'), width=500, height=500)