In [1]:
#Imports

import matplotlib.pyplot as plt
import networkx as nx
import random as rand
import numpy as np
import scipy
import prior_calc
import posterior_calc
import collections
from networkx.algorithms.traversal.depth_first_search import dfs_tree
import tree_generation
import data_reader
import generate_test_data
import tree_comparison

In [2]:
reload(generate_test_data)
# This function assumes that the priors have been already calculated
# for the given number of cells and the alpha value. This function performs
# exactly 1 experiment in the following steps:
# 1) Generate a dataset, with a "true" tree (along with ambiguity information)
# 2) Construct the posteriors and our inferred tree
# 3) Calculate how similar those two trees were, taking ambiguities into account
def perform_experiment(num_cells, num_muts, alpha_value, prior_probs):
    # Generate "fake" data to test
    test_generator = generate_test_data.data_generator(num_cells, num_muts, alpha=alpha_value)
    test_generator.initialize_Tk()
    test_generator.create_lineage_tree()
    test_generator.apply_mutations()
    test_data = test_generator.return_genotype_data()
    true_tree = test_generator.construct_true_mut_tree() # true tree to be used for comparison
    true_ambiguities = test_generator.get_ambiguities() # inherent ambiguities in true tree
    
    # Calculate posteriors and construct the tree
    posteriors = posterior_calc.posterior_calculator(test_data, prior_probs, num_cells)
    posteriors.calculate_likelihood()
    posteriors.calculate_posteriors()
    posterior_probabilities = posteriors.return_posteriors()
    tree_generator = tree_generation.tree_generation(posterior_probabilities)
    inferred_tree = tree_generator.get_min_tree()
    
    # Compare the true_tree and the inferred_tree and return the similarity score
    return tree_comparison.compare_mutation_order(true_tree, inferred_tree, true_ambiguities)
    
    

In [4]:
# METADATA
NUM_CELLS = 6
ALPHA = 0.3
NUM_MUTS = 4
for ALPHA in [0.3, 0.5, 0.7, 0.9]:
    for NUM_CELLS in [5, 10, 25, 50]:
        # This cell evaluates all the priors without error probabilities
        priors_pure = prior_calc.prior_calculator(NUM_CELLS, ALPHA, Btree=100, Bmut=10000)
        priors_pure.simulate()
        priors_pure.compute_final_probs()
        prior_probabilities_pure = priors_pure.return_priors()
        
        for NUM_MUTS in [3, 6, 12, 19]:
            total_score = 0
            for trial in range(1000):
                total_score += perform_experiment(NUM_CELLS, NUM_MUTS, ALPHA, prior_probabilities_pure)
            score = total_score/float(1000)
            print 'For ALPHA=' + str(ALPHA) + ', NUM_CELLS=' + str(NUM_CELLS) + ', NUM_MUTS' + str(NUM_MUTS) + ', score =' + str(score)
            
        

For ALPHA=0.3, NUM_CELLS=5, NUM_MUTS3, score =0.91
For ALPHA=0.3, NUM_CELLS=5, NUM_MUTS6, score =0.744333333333
For ALPHA=0.3, NUM_CELLS=5, NUM_MUTS12, score =0.688651515152
For ALPHA=0.3, NUM_CELLS=5, NUM_MUTS19, score =0.634385964912
For ALPHA=0.3, NUM_CELLS=10, NUM_MUTS3, score =0.827
For ALPHA=0.3, NUM_CELLS=10, NUM_MUTS6, score =0.699133333333
For ALPHA=0.3, NUM_CELLS=10, NUM_MUTS12, score =0.557803030303
For ALPHA=0.3, NUM_CELLS=10, NUM_MUTS19, score =0.470479532164
For ALPHA=0.3, NUM_CELLS=25, NUM_MUTS3, score =0.850333333333
For ALPHA=0.3, NUM_CELLS=25, NUM_MUTS6, score =0.699933333333
For ALPHA=0.3, NUM_CELLS=25, NUM_MUTS12, score =0.553666666667
For ALPHA=0.3, NUM_CELLS=25, NUM_MUTS19, score =0.478333333333
For ALPHA=0.3, NUM_CELLS=50, NUM_MUTS3, score =0.918666666667
For ALPHA=0.3, NUM_CELLS=50, NUM_MUTS6, score =0.764133333333
For ALPHA=0.3, NUM_CELLS=50, NUM_MUTS12, score =0.709575757576
For ALPHA=0.3, NUM_CELLS=50, NUM_MUTS19, score =0.65334502924
For ALPHA=0.5, NUM_CELLS

In [None]:
# This cell evaluates all the priors without error probabilities
reload(prior_calc)
priors_pure = prior_calc.prior_calculator(NUM_CELLS, ALPHA, Btree=100, Bmut=10000)
priors_pure.simulate()
priors_pure.compute_final_probs()
prior_probabilities_pure = priors_pure.return_priors()

In [None]:
score = perform_experiment(NUM_CELLS, NUM_MUTS, ALPHA, prior_probabilities_pure)
print score