Reconstructing a lineage tree from mutation table

In [1]:
import os
import sys
sys.path.append('path_to_tmc_wrapper')

from tmc_wrapper.utils import to_dict_no_nans, memory_expensive_random_choose
from tmc_wrapper.TMC_formatting import map_cell_ids_for_sagi, format_triplet, convert_names_in_sagis_newick
from tmc_wrapper.triplet_scoring import choose_best_pair, NoIntersectionLociException

import pandas as pd

In [2]:
name = 'SubC3_train_000X'
project_dir = 'path_to_Subchallenge3/{}'.format(name)

In [3]:
df = pd.read_csv(os.path.join(project_dir,'{}_mutation_table.csv'.format(name)), index_col=0)

In [4]:
d = to_dict_no_nans(df.T)

In [5]:
triplets_file = '/tmp/triplets_test.txt'
cell_id_map_for_sagi = map_cell_ids_for_sagi(d)
with open(triplets_file, 'w') as f:
    for triplet in memory_expensive_random_choose(d.keys(), k=3, n=5000):
        try:
            pair, score = choose_best_pair(triplet, d)
        except NoIntersectionLociException:
            continue
        f.write(format_triplet(triplet, pair, score, cell_id_map_for_sagi, print_scores=True))

In [6]:
import os

class EmptyTripletsFile(Exception):
    pass


def run_TMC(triplets_file, output_newick):
    # tmc(
    #     "-fid", triplets_file,
    #     "-frtN", output_newick)
    if os.stat(triplets_file).st_size == 0:
        raise EmptyTripletsFile('Empty file: '.format(triplets_file))
    tmc_cli = 'full_path_to_tmc_wrapper/TMC/treeFromTriplets -fid {} -frtN {} -w 1 -index 2'.format(
        triplets_file, output_newick)
    return os.system(tmc_cli)

In [7]:
index_labeled_output_newick = '/tmp/index_labeled_triplets_test.newick'
run_TMC(triplets_file, index_labeled_output_newick)

256

In [8]:
newick_tree_path = '/tmp/triplets_test.newick'

In [9]:
convert_names_in_sagis_newick(index_labeled_output_newick, newick_tree_path, cell_id_map_for_sagi)

In [10]:
import dendropy
rec_tree = dendropy.Tree.get_from_path(
                    newick_tree_path,
                "newick")

In [11]:
print(rec_tree.as_ascii_plot())

/---------------------------------------------------------------- SM563
|                                                                      
|---------------------------------------------------------------- SM300
|                                                                      
|---------------------------------------------------------------- NR472
|                                                                      
|---------------------------------------------------------------- KT68 
|                                                                      
|---------------------------------------------------------------- KT222
|                                                                      
|---------------------------------------------------------------- DG238
|                                                                      
|/--------------------------------------------------------------- PC211
||                                                              