Reconstructing a lineage tree from mutation table

In [1]:
import os
import sys
sys.path.append('path_to_examples')

from utils import run_command

import pandas as pd

In [2]:
name = 'SubC3_train_000X'

In [3]:
project_dir = 'path_to_Subchallenge3/{}'.format(name)

In [4]:
df = pd.read_csv(os.path.join(project_dir,'{}_mutation_table.csv'.format(name)), index_col=0)

In [None]:
# translate scar values to fake amino-acids characters

In [5]:
symbol_map = {
    0.0:'A',
    '0':'A',
    'A':'R',
    'B':'N',
    'C':'D',
    'D':'C',
    'E':'Q',
    'F':'E',
    'G':'G',
    'H':'H',
    'I':'I',
    'J':'L',
    'K':'K',
    'L':'M',
    'M':'F',
    'N':'P',
    'O':'S',
    'P':'T',
    'Q':'W',
    'R':'Y',
    'S':'-', # Skipping rare characters as we're running out of standard amino acids
    'T':'-',
    'U':'-',
    'V':'-',
    '-':'V', # Inherited deletion is treated as charcter as it carries information (unlike acquisition dropout)
}

In [6]:
for col in df.columns:
    df[col] = df[col].map(symbol_map)
df.fillna("-",inplace=True)
normalized_newick_path = os.path.join(project_dir,'ft2_data.fasta')  
with open(normalized_newick_path, 'w') as f:
    for sr, row in df.iterrows():
        f.write('>{}\r\n{}\r\n'.format(sr, ''.join(row.values)))

In [7]:
import dendropy

fasttree_path = "path_to_FastTree"

fasttree_transition_table = 'unified_transition_table_for_fastree.mat'

def run_fasttree(newick_output_file, mutations_fasta_path):
    run_command([fasttree_path, 
                 '-pseudo', 
                 '-trans', fasttree_transition_table,
                 '-out', newick_output_file, 
                 mutations_fasta_path])
    good_tree = dendropy.Tree.get_from_path(
                        newick_output_file,
                    "newick")
    return good_tree

In [8]:
newick_output_file = os.path.join(project_dir,'FT2.newick')
rec_tree = run_fasttree(newick_output_file, normalized_newick_path)

In [9]:
print(rec_tree.as_ascii_plot())

                                                             /--- LN232
                                            /----------------+         
                                            |                \--- LN65 
                                            |                          
                                            |      /------------- DG223
   /----------------------------------------+  /---+                   
   |                                        |  |   |  /---------- DG26 
   |                                        |  |   \--+                
   |                                        |  |      |   /------ DG513
   |                                        |  |      \---+            
   |                                        \--+          |  /--- DG45 
   |                                           |          \--+         
   |                                           |             \--- DG438
   |                                           |                