In [1]:
import matplotlib.pyplot as plt
import networkx as nx
import random as rand
import numpy as np
import scipy
import prior_calc
import posterior_calc
import collections
from networkx.algorithms.traversal.depth_first_search import dfs_tree
import tree_generation
import data_reader
import generate_test_data
import tree_comparison

In [2]:
# This is for using Kim/Simon's true data

num_cells = 58

#read the data and get the information for the 18 genes describes in the paper
mutation_data = data_reader.data_reader('data_mmc2.xlsx')
target = ['PDE4DIP(A->G)', 'NTRK1(A->G)', 'SESN2(C->T)', 'ARHGAP5(G->A)', 'DNAJC17(C->G)', 'USP32(C->T)', 
          'ANAPC1(G->A)', 'RETSAT(C->T)', 'ST13(G->A)', 'DLEC1(T->C)', 'FRG1(G->A)', 'DMXL1(G->A)', 'FAM115C(T->C)', 
          'MLL3(C->T)', 'ABCB5(G->T)', 'ASNS(T->A)', 'PABPC1(C->T)', 'TOP1MT(A->G)']

important_gene = collections.OrderedDict()
for gene in target:
    important_gene[gene] = mutation_data.get_gene_mutations(gene)
data = important_gene



In [52]:
# This cell evaluates all the priors without error probabilities
reload(prior_calc)
priors_pure = prior_calc.prior_calculator(num_cells, .95, Btree=100, Bmut=10000)
priors_pure.simulate()
priors_pure.compute_final_probs()
prior_probabilities_pure = priors_pure.return_priors()

In [49]:
# Calculate posteriors and construct the tree
posteriors = posterior_calc.posterior_calculator(data, prior_probabilities_pure, num_cells)
posteriors.calculate_likelihood()
posteriors.calculate_posteriors()
posterior_probabilities = posteriors.return_posteriors()
tree_generator = tree_generation.tree_generation(posterior_probabilities)
inferred_tree = tree_generator.get_min_tree()

In [50]:
#tree_generator.print_tree(inferred_tree)
G = inferred_tree
pos = nx.spring_layout(G,scale=1)
labels=nx.draw_networkx_labels(G,pos)
nx.draw(G,pos,font_size=8)
plt.show()

In [134]:
#Kim's tree
kim_tree = nx.DiGraph()
kim_tree.add_edge('DLEC1(T->C)','USP32(C->T)')
kim_tree.add_edge('DLEC1(T->C)','DNAJC17(C->G)')
kim_tree.add_edge('DLEC1(T->C)','ABCB5(G->T)')
kim_tree.add_edge('DNAJC17(C->G)','ST13(G->A)')
kim_tree.add_edge('DNAJC17(C->G)','NTRK1(A->G)')
kim_tree.add_edge('NTRK1(A->G)', 'DMXL1(G->A)')
kim_tree.add_edge('DMXL1(G->A)','TOP1MT(A->G)')
kim_tree.add_edge('TOP1MT(A->G)','SESN2(C->T)')
kim_tree.add_edge('SESN2(C->T)','ASNS(T->A)')
kim_tree.add_edge('SESN2(C->T)','MLL3(C->T)')
kim_tree.add_edge('SESN2(C->T)','FRG1(G->A)')
kim_tree.add_edge('SESN2(C->T)','PABPC1(C->T)')
kim_tree.add_edge('SESN2(C->T)','FAM115C(T->C)')
kim_tree.add_edge('SESN2(C->T)','PDE4DIP(A->G)')
kim_tree.add_edge('SESN2(C->T)','ANAPC1(G->A)')
kim_tree.add_edge('SESN2(C->T)','ARHGAP5(G->A)')
kim_tree.add_edge('SESN2(C->T)','RETSAT(C->T)')

tree_comparison.compare_mutation_order(kim_tree, inferred_tree)


0.64052287581699341