# Simulate mutation gain and loss as a markov chain

Based off methodology described in [Pascoe et al](https://sfamjournals.onlinelibrary.wiley.com/doi/10.1111/1462-2920.13051) and [Sheppard et al](https://www.pnas.org/doi/10.1073/pnas.1305559110)

In this notebook, we will construct a null distribution for the enrichment scores calculate in `calculate-enrichment-scores-across-tree`. To do so, we want to simulate the gain and loss of mutations as a random process that occurs across the tree. We will do this following the methodology outlined in Pascoe et al. Currently, the way I am doing this is as follows: 

1. Starting with the tree in JSON format, make a copy of the tree object. 
2. On that copy, delete all existing mutations. Retain the tree's structuree (branch lengths, topology, tips) 
3. On that empty tree, traverse from root to tip. At each branch, evaluate whether to randomly add a mutation or not. This is evaluated probablistically as: `probability no mutation = (1.0+exp(-2.0*branch_length*rate))/2.0`, where `rate = 1/total_tree_branch_length`. This is exactly the same as what was described by Sheppard et al and is in his code. 
4. Perform a random draw of a number between 0 and 1. If that number is less than `probability no mutation`, do not mutate. If greater than, do mutate. 
5. If mutate, determine whether this is a gain of mutation or loss of mutation. To do this, we need to find the current state. To do so, traverse back up the tree to find the most recent ancestral node at which there is a mutation. If there is none, then our current state is unmutated, and this event is a gain eventt. If the current state is mutated, then this event will be a loss event. 
4. Add the proper mutation. Either a wild type to mutant (W1M) or mutant to wild type (M1W). Add this as an amino acid mutation on the tree. 
5. Return the simulated tree. Calculate enrichment score as: 

|host|presence|absence|
|:------|:-------|:------|
|host 1|A|B| 
|host 2|C|D|

where A, B, C, and D are counts of the mutation's presence and absence in host 1 and host 2. The odds ratio is then calculated as: `OR = (A * D)/(B * C)`

In [27]:
import glob, json
import re,copy, imp
import pandas as pd 
import numpy as np
from io import StringIO
import time
from Bio import SeqIO
from Bio.Seq import Seq

# for this to work, you will need to download the most recent version of baltic, available here 
bt = imp.load_source('baltic', '../baltic/baltic/baltic-modified-for-muts.py')

## Deal with posterior

In [28]:
def get_taxa_lines(tree_path):    

    lines_to_write = ""
    with open(tree_path, 'rU') as infile:
        for line in infile: ## iterate through each line
            if 'state' not in line.lower(): #going to grab all the interesting stuff in the .trees file prior to the newick tree strings
                lines_to_write = lines_to_write + line

    return(lines_to_write)

In [29]:
def convert_strain_to_number(taxa_lines):
    
    output_dict = {}
    
    translation_block = taxa_lines.split("Translate\n")[1]
    translation_list = translation_block.replace("\t","").split("\n")
    
    for t in translation_list: 
        information = t.lstrip().replace(",","")  # remove leading white spaces and commas
        
        if len(information.split(" ")) == 2:
            numeric_id = information.split(" ")[0]
            strain_name = information.split(" ")[1]
        
            output_dict[numeric_id] = strain_name
            
        else: 
            pass
        
    return(output_dict)

In [30]:
def convert_leaves_to_strains(input_leaves, strains_dict): 
    output_list = []
    
    for l in input_leaves: 
        strain_name = strains_dict[l]
        output_list.append(strain_name)
        
    return(output_list)

In [31]:
def get_burnin_value(tree_path, burnin_percent):
    with open(tree_path, 'rU') as infile:
        numtrees = 0
        for line in infile: ## iterate through each line
            if 'state' in line.lower(): #going to grab all the interesting stuff in the .trees file prior to the newick tree strings
                numtrees += 1
    
    burnin = numtrees * burnin_percent
    return(burnin)

# Translations and ancestral sequence reconstruction

In [32]:
def read_alignment(alignment_file):
    alignment_dict = {}

    for seq in SeqIO.parse(alignment_file, "fasta"):
        seqName = seq.description 
        sequence = str(seq.seq)
        alignment_dict[seqName] = sequence
        
    return(alignment_dict)

In [33]:
def return_cds_coordinates(genbank_ref_file):
    
    from Bio import GenBank
    with open(genbank_ref_file) as handle:
        for record in GenBank.parse(handle):

            # pull out the CDS feature; the gene coordinatees are in the feature.location. Get help with help(feauture)
            for f in record.features:
                if f.key == "CDS":
                    cds_start = int(f.location.split("..")[0])
                    cds_stop = int(f.location.split("..")[1])
                
    return(cds_start, cds_stop)

In [34]:
def return_mutations_on_branch(branch):
    if branch == None: 
        mutations = []
    elif "mutations" in branch.traits: 
        mutations = branch.traits["mutations"].split(",")
    else:
        mutations = []
    
    return(mutations)

In [35]:
def return_mutated_sequence(sequence, muts, cds_start, cds_stop):
    # make into a list because strings are immutable, while list are not
    mutated_sequence = list(sequence)
    
    for m in muts:
        site = int(m[1:-1])-1   # -1 is because of 0 indexing
        ancestral_nt = m[0]
        mutated_nt = m[-1]
        
        # since we are going backwards up the tree, we are reconstructing the ancestral sequence
        mutated_sequence[site] = ancestral_nt
    
    mutated_sequence = "".join(mutated_sequence)
    mutated_aa_sequence, aa_muts = return_mutated_aa_sequence(sequence, mutated_sequence, cds_start, cds_stop)
    # return a string
    return(mutated_sequence, mutated_aa_sequence, aa_muts)

In [36]:
def return_aa_sequence(sequence, cds_start, cds_stop):
    
    ha_cds = str(sequence)[cds_start-1:cds_stop-1]    # slice string based on cds coordinates
    ha_cds_seq = Seq(ha_cds)    # make it a Seq object
    translation = ha_cds_seq.translate()
    
    return(str(translation))

In [37]:
def return_mutated_aa_sequence(sequence, mutated_sequence, cds_start, cds_stop):
    
    ha_cds = str(sequence)[cds_start-1:cds_stop-1]    # slice string based on cds coordinates
    ha_cds_seq = Seq(ha_cds)    # make it a Seq object
    translation = ha_cds_seq.translate()
    mutated_translation = str(Seq(str(mutated_sequence)[cds_start-1:cds_stop-1]).translate())  # same as above but on 1 line
    
    aa_muts = []
    for i in range(len(translation)):
        if mutated_translation[i] != translation[i]:
            aa_mut = mutated_translation[i] + str(i+1) + translation[i]
            aa_muts.append(aa_mut)

    return(mutated_translation, aa_muts)

In [38]:
def return_all_parents(k, parents_dict, sequence, cds_start, cds_stop):
    mutations = return_mutations_on_branch(k)
    
    # if at root
    if k.parent == None:
        return(parents_dict)
    
    # if not at root yet
    elif k.branchType == "leaf":
        
        # do something else here....we've already recorded the mutations and stuff so we should just go up one
        parents_dict = return_all_parents(k.parent, parents_dict, sequence, cds_start, cds_stop)
    
    else:
        sequence, aa_sequence, aa_muts = return_mutated_sequence(sequence, mutations, cds_start, cds_stop)
        parents_dict[k] = {"nt_muts": mutations, "sequence":sequence, "aa_sequence":aa_sequence, 
                                  "aa_muts":aa_muts}
        parents_dict = return_all_parents(k.parent, parents_dict, sequence, cds_start, cds_stop)
        
    return(parents_dict)

In [39]:
def return_sequence_map(tree, alignment_dict, cds_start, cds_stop):
    
    all_nodes = {}

    for k in tree.Objects: 
        if k.branchType == "leaf":
            sequence = alignment_dict[k.name]
            aa_sequence = return_aa_sequence(sequence, cds_start, cds_stop)
            mutations = return_mutations_on_branch(k)
            mutated_sequence, mutated_aa_sequence, aa_muts = return_mutated_sequence(sequence, mutations, cds_start, cds_stop)
            all_nodes[k] = {"muts":mutations, "aa_muts":aa_muts, "leaves":"NA"}
            
            # parents dict will include all parental nodes from the tip back to the root with their mutations, 
            # nucleotide sequences, and names as 'branchName':{'nt_muts':[list of nt muts], 'sequence': str(nt sequence)}
            parents_dict = {}
            parents_dict = return_all_parents(k, parents_dict, sequence, cds_start, cds_stop)
            
            # make a master list of internal nodes we've already inferred to not repeat work
            for p in parents_dict:
                leaves = p.leaves
                sequence = parents_dict[p]['sequence']
                aa_sequence = parents_dict[p]['aa_sequence']
                aa_muts = parents_dict[p]['aa_muts']
                muts = parents_dict[p]['nt_muts']
                
                # check to see if the name matches and if the leaves match; sometimes baltic assigns the same numeric
                # name to 2 different nodes!
                if p in all_nodes:
                    if all_nodes[p]['leaves'] == leaves:
                        pass
                    else:
                        all_nodes[p] = {"muts":muts, "aa_muts":aa_muts, "leaves":leaves}
                else: 
                    all_nodes[p] = {"muts":muts, "aa_muts":aa_muts, "leaves":leaves}

    return(all_nodes)

In [58]:
"""this function does 3 things: 1. for each branch, it records the branch name and its branch length in a 
dictionary; 2. it adds up the total branch length on the tree. For the beast trees, we only have branch lengths 
in time. However, we can get a reasonable branch length (and I think this is perfectly reasonable for this purpose)
by just summing the total mutations on the branch and dividing by the total number of sites. The only purpose 
in this analysis for the total tree branch length is to get an idea of the number of mutations that should 
occur across the tree. So this should work.  3. Gather all mutations across the tree."""

def return_total_tree_branch_length(tree, n_sites_alignment, sequence_map):
    total_branch_length = 0
    branch_lengths = {}
    all_nt_muts = []
    all_aa_muts = []
    
    for k in tree.Objects:
        muts_on_branch = sequence_map[k]['muts']
        aa_muts_on_branch = sequence_map[k]['aa_muts']
        
        all_nt_muts.extend(muts_on_branch)
        all_aa_muts.extend(aa_muts_on_branch)
        
        branch_length_time = k.length
        branch_length_divergence = len(muts_on_branch)/n_sites_alignment
                
        total_branch_length += branch_length_divergence
        branch_lengths[k] = branch_length_divergence
    
    all_nt_muts = list(set(all_nt_muts))
    all_aa_muts = list(set(all_aa_muts))

    return(total_branch_length, branch_lengths, all_nt_muts, all_aa_muts)

# Simulations

In [41]:
def simulate_gain_loss(branch_length, total_tree_branch_length):
    from math import exp
    import random
    
    rate = 1/total_tree_branch_length
    probability_stay_same = (1.0+exp(-2.0*branch_length*rate))/2.0
    #probability_stay_same = ((1.0+exp(-2.0*branch_length*rate))/2.0)*2500  # does it matter if I say you have to hit the right site? 
    
    # pick a random number between 0 and 1.0
    value = random.random()
    
    if value < probability_stay_same:
        mutation = 0   # don't mutate
    else:
        mutation = 1
        
    return(mutation)

In [69]:
"""this function deletes all amino acid mutations from the tree"""

def return_no_muts_tree(tree):
    
    # we need to make a copy, otherwise this will alter the no muts tree
    
    for k in tree.Objects:
                
        if 'mutations' not in k.traits:
            k.traits['mutations'] = []        
        else:
            k.traits['mutations'] = []
            
            
        if 'mutations' not in k.parent.traits:
            k.parent.traits['mutations'] = [] 
        else:
            k.parent.traits['mutations'] = []

    
    return(tree)

In [70]:
def return_most_recent_mutated_node(node):
    """given an internal node, traverse back up the tree to find a parental node that has a mutation annotation.
    if you get to the root without finding a mutation, return root. This is necessary for determining the proper 
    starting state for the mutation you are adding"""
    
    if node.traits['mutations'] == []:
        
        if node.parent !=None:
            parent_node = return_most_recent_mutated_node(node.parent)
        
        else:
            #print("root is proper parent")
            parent_node = node
    
    else: 
        #print("current node has proper length")
        parent_node = node
    
    return(parent_node)

In [44]:
# need to figure out a way to do branch length below without using the mutations

In [106]:
def simulate_gain_loss_as_markov_chain(tree, sequence_map, n_sites_alignment, total_tree_branch_length, branches_that_mutated):
    
    # my fake mutation is going to be W1M for wild-type 1 mutant
    for k in tree.Objects:
        
        # collect branch length
        muts_on_branch = sequence_map[k]['muts']
        print(muts_on_branch)
        branch_length = len(muts_on_branch)/n_sites_alignment

        # if this happens at the root, set parent divergence to 0
#         if 'muts'k.parent.traits == {}:
#             parent_div = 0
#         elif 'node_attrs' not in k.parent.traits:
#             parent_div = 0
#         else:
#             parent_div = k.parent.traits['node_attrs']['div']

#         branch_length = divergence - parent_div
        
        # find the most recent mutated parent (this could be the parent node, grandparent, etc...)
        most_recent_mutated_parent = return_most_recent_mutated_node(k.parent)
        
        if most_recent_mutated_parent not in sequence_map: 
            #print(most_recent_mutated_parent, "not in sequence map")
            parent_mut_state = []
        else:
            parent_mut_state = sequence_map[most_recent_mutated_parent]['muts']
        
        """given the length of the current branch and the total tree branch length, perform a random draw to 
        decide whether to mutate. A result of 1 means mutate, 0 means do not mutate""" 
        print(branch_length, total_tree_branch_length)
        mutation = simulate_gain_loss(branch_length, total_tree_branch_length)
        print(mutation)        
        
        if mutation == 1:  # if we've mutated
            
            # add branch to dictionary for plotting later 
            if k.name in branches_that_mutated:
                branches_that_mutated[k.name]["times_mutated"] += 1
            else:
                branches_that_mutated[k.name] = {"branch_length":branch_length, "times_mutated":1}
            
            #print("we are mutating branch ", k)
            if parent_mut_state == [] or parent_mut_state == ['M1W']:
                k.traits['mutations'] = ['W1M']
            elif parent_mut_state == ['W1M']:
                k.traits['mutations'] = ['M1W']
                
    return(tree, branches_that_mutated)

In [None]:
sim_tree, branches_that_mutated = simulate_over_posterior(all_trees, burnin, n_sites_alignment, genbank_ref_file, alignment)

In [99]:
def simulate_over_posterior(all_trees, burnin, n_sites_alignment, genbank_ref_file, alignment):
    start_time = time.time()
    
    cds_start, cds_stop = return_cds_coordinates(genbank_ref_file)
    alignment_dict = read_alignment(alignment)

    with open(all_trees, "r") as infile:
        
        taxa_lines = get_taxa_lines(all_trees)
        strains_dict = convert_strain_to_number(taxa_lines)

        tree_counter = 0

        for line in infile:
            if 'tree STATE_' in line:
                tree_counter += 1
                
                if tree_counter >= burnin:
                    print(tree_counter)
                    temp_tree = StringIO(taxa_lines + line)
                    tree = bt.loadNexus(temp_tree)
                    
                    # generate the sequence map, which maps for each branch the mutations, aa muts, and sequences
                    sequence_map = return_sequence_map(tree, alignment_dict, cds_start, cds_stop)
                    
                    # generate a no muts tree
                    no_muts_tree = return_no_muts_tree(tree)
                                        
                    # calculate total tree branch length
                    total_tree_branch_length, branch_lengths, nt_muts, aa_muts = return_total_tree_branch_length(tree, n_sites_alignment, sequence_map)
                    
                    # simulate across tree
                    branches_that_mutated = {}
                    sim_tree, branches_that_mutated = simulate_gain_loss_as_markov_chain(no_muts_tree, sequence_map, n_sites_alignment, total_tree_branch_length, branches_that_mutated)
    
    # print the amount of time this took
    total_time_seconds = time.time() - start_time
    total_time_minutes = total_time_seconds/60
    print("this took", total_time_seconds, "seconds (", total_time_minutes," minutes) to run on", tree_counter, "trees")
    return(sim_tree, branches_that_mutated)

In [100]:
n_sites_alignment = 1762
min_required_count = 1
burnin_percent = 0.999

alignment = "../../h5n1-host-classification/beast/alignments/aligned_h5n1_ha-3deme-1per-country-month-host-downsampled-bad-dates-2021-06-09-with-annotations-2021-07-06.fasta"
genbank_ref_file = "../test-data/reference_h5n1_ha.gb"
all_trees = "../../h5n1-host-classification/beast/beast-runs/2022-04-19-mascot-3deme-skyline-fixed-muts-logger/it3/2022-04-19-mascot-3deme-skyline-tipdates.muts.trees"


taxa_lines = get_taxa_lines(all_trees)
burnin = get_burnin_value(all_trees, burnin_percent)
print(burnin)

433.566


  after removing the cwd from sys.path.
  


In [107]:
sim_tree, branches_that_mutated = simulate_over_posterior(all_trees, burnin, n_sites_alignment, genbank_ref_file, alignment)

  after removing the cwd from sys.path.


434
[]
0.0 3.150397275822912
0
['C72T', 'T138C', 'T392C', 'T482A', 'C1171A', 'A1228G', 'C1341T']
0.003972758229284903 3.150397275822912
0
['C957T']
0.0005675368898978433 3.150397275822912
0
['T10C', 'A202G', 'C219A', 'C392T', 'T447A', 'G488A', 'C660T', 'G684A', 'T699C', 'G703A', 'C718T', 'G1278A']
0.00681044267877412 3.150397275822912
0
['A9C', 'T264A', 'T483G', 'C699A', 'C783T', 'G1287A', 'T1464C']
0.003972758229284903 3.150397275822912
0
['A672G', 'T789C', 'A1017G', 'C1028G', 'A1517G', 'G1602A']
0.00340522133938706 3.150397275822912
0
['C9T', 'G320A', 'G486A', 'G555A', 'A635G', 'A744G', 'G771A', 'T783A', 'A1158G', 'C1566A']
0.0056753688989784334 3.150397275822912
0
['G225A', 'G349A', 'A439G', 'C1561T', 'A1668G']
0.0028376844494892167 3.150397275822912
0
['T57C', 'A219G', 'G533A', 'G750T', 'T874G', 'G998A', 'C1029T', 'A1701G']
0.004540295119182747 3.150397275822912
0
['C43T', 'T69C', 'G75T', 'G202A', 'G227A', 'T243A', 'G291A', 'G309A', 'C370T', 'T411A', 'A427G', 'C455T', 'A456G', 'A52

['C37T', 'T87C', 'T144C', 'A374G', 'T474C', 'T519C', 'C569T', 'T612C', 'C682T', 'G732A', 'T816C', 'G843A', 'C888A', 'C948T', 'C966T', 'G1104A', 'A1113T', 'C1254A', 'C1695T']
0.010783200908059024 3.150397275822912
0
['G1052A']
0.0005675368898978433 3.150397275822912
0
['C465T', 'C585T', 'T801C', 'T957C', 'A994G', 'G1065A', 'C1137T', 'C1197T', 'A1257T', 'T1272C', 'G1281A', 'G1308A', 'A1426C', 'A1452G', 'A1492G', 'T1503C', 'G1549A', 'A1605G', 'G1638T', 'T1652C', 'A1666G', 'T1680C']
0.012485811577752554 3.150397275822912
0
['G427A', 'C789T', 'A1495G']
0.00170261066969353 3.150397275822912
0
['T42G', 'C1134T', 'G1427A', 'T1503C', 'C1607T']
0.0028376844494892167 3.150397275822912
0
['T43C', 'C120T', 'C195T', 'A276G', 'T351C', 'A488C', 'G510A', 'A855G', 'A913G', 'G1028A', 'C1031T', 'A1034G', 'G1305A', 'A1426G', 'T1467C', 'G1559A', 'T1644C']
0.009648127128263337 3.150397275822912
0
['A188G', 'T669C', 'G1152A', 'T1725C']
0.0022701475595913734 3.150397275822912
0
['T1029C', 'C1435T']
0.001135073

['G111A', 'A556G', 'T603C', 'C963A', 'A1035G', 'T1272C', 'G1498A', 'A1707G']
0.004540295119182747 3.150397275822912
0
['T50C', 'A294G', 'A316G', 'C378T', 'T382C', 'G635A', 'G774A', 'G921T', 'C1464T']
0.00510783200908059 3.150397275822912
0
['T63C', 'A177T', 'A345G', 'C420T', 'A513G', 'C1473T']
0.00340522133938706 3.150397275822912
0
['G987A', 'A1260G', 'C1533T']
0.00170261066969353 3.150397275822912
0
['A1584G']
0.0005675368898978433 3.150397275822912
0
[]
0.0 3.150397275822912
0
['A533G']
0.0005675368898978433 3.150397275822912
0
['A201G', 'C318A', 'A1053G']
0.00170261066969353 3.150397275822912
0
['T1745C']
0.0005675368898978433 3.150397275822912
0
['G202A', 'G225A', 'C687T', 'G709T', 'A912G', 'G1092A']
0.00340522133938706 3.150397275822912
0
['T243C', 'A350C']
0.0011350737797956867 3.150397275822912
0
['G168A', 'A502C']
0.0011350737797956867 3.150397275822912
0
['A175G', 'T595A', 'G1593A']
0.00170261066969353 3.150397275822912
0
['A284G', 'C1131T', 'A1518G']
0.00170261066969353 3.15

0
['A1605G']
0.0005675368898978433 3.150397275822912
0
[]
0.0 3.150397275822912
0
['C966A']
0.0005675368898978433 3.150397275822912
0
['G724A']
0.0005675368898978433 3.150397275822912
0
['G208A', 'C682T', 'A828G']
0.00170261066969353 3.150397275822912
0
[]
0.0 3.150397275822912
0
['C141A', 'C573T', 'C966A', 'C1143T']
0.0022701475595913734 3.150397275822912
0
['C183T', 'A1278G', 'T1443C']
0.00170261066969353 3.150397275822912
0
['G174A', 'G288A']
0.0011350737797956867 3.150397275822912
0
[]
0.0 3.150397275822912
0
[]
0.0 3.150397275822912
0
[]
0.0 3.150397275822912
0
['A426G', 'G636A', 'A1701G']
0.00170261066969353 3.150397275822912
0
[]
0.0 3.150397275822912
0
['A67G']
0.0005675368898978433 3.150397275822912
0
[]
0.0 3.150397275822912
0
['C1215G', 'A1756C']
0.0011350737797956867 3.150397275822912
0
[]
0.0 3.150397275822912
0
['T53C', 'C199T', 'G529A', 'A532G', 'C592T', 'A878G', 'C963T', 'C1425T', 'T1725C']
0.00510783200908059 3.150397275822912
0
['C326T', 'T474C', 'A601G', 'A1107G', 'G

0
['G292A', 'C379T', 'T585G', 'C648A']
0.0022701475595913734 3.150397275822912
0
['A1050G', 'G1737A']
0.0011350737797956867 3.150397275822912
0
['C255T', 'C261T', 'A488G', 'C540T', 'G697A', 'T825C', 'A1095G', 'G1146A', 'C1521T']
0.00510783200908059 3.150397275822912
0
['A661G', 'T861C']
0.0011350737797956867 3.150397275822912
0
['G105A', 'A768G', 'T1380C']
0.00170261066969353 3.150397275822912
0
['G616A', 'G1308A', 'T1461C']
0.00170261066969353 3.150397275822912
0
['C192T', 'A613G', 'C753T', 'C789T', 'C1206T', 'T1242C', 'C1275T']
0.003972758229284903 3.150397275822912
0
['C756T']
0.0005675368898978433 3.150397275822912
0
['G105A', 'A535T', 'G627A']
0.00170261066969353 3.150397275822912
0
[]
0.0 3.150397275822912
0
['G428A']
0.0005675368898978433 3.150397275822912
0
['A504G', 'G533A', 'T642C', 'T660C', 'G1104A', 'G1158A', 'G1195A', 'T1242C', 'C1464T']
0.00510783200908059 3.150397275822912
0
['C10T', 'G225A', 'T534C', 'G1308A']
0.0022701475595913734 3.150397275822912
0
['T9A', 'C318T', '

In [1]:
def perform_simulations(input_tree, gene, iterations, total_tree_branch_length, host1, host2, min_required_count, method, host_counts):
    
    no_muts_tree = return_no_muts_tree(input_tree, gene)

    scores_all = []
    times_detected_all = {}
    branches_that_mutated = {}
    
    for i in range(iterations):
        sim_tree, branches_that_mutated = simulate_gain_loss_as_markov_chain(no_muts_tree, total_tree_branch_length, branches_that_mutated)
        scores, scores_dict, times_detected_dict, branch_lengths_dict, host_counts_dict2 = calculate_enrichment_scores(sim_tree, ['W1M'],['W1M'], host1, host2, min_required_count, method, host_counts)
        scores_all.extend(scores)
        times_detected_all[i] = times_detected_dict

    return(scores_all, times_detected_all, branches_that_mutated)