In [1]:
import pandas as pd
import random

from gcdyn import bdms, gpmap, mutators, responses, utils

In [2]:
MK_RS5NF_mutability_path = "../gcdyn/data/MK_RS5NF_mutability.csv"
MK_RS5NF_substitution_path = "../gcdyn/data/MK_RS5NF_substitution.csv"

mutability = pd.read_csv(MK_RS5NF_mutability_path, index_col=0).squeeze("columns")
substitution = pd.read_csv(MK_RS5NF_substitution_path, index_col=0)

replay_naive = "GAGGTGCAGCTTCAGGAGTCAGGACCTAGCCTCGTGAAACCTTCTCAGACTCTGTCCCTCACCTGTTCTGTCACTGGCGACTCCATCACCAGTGGTTACTGGAACTGGATCCGGAAATTCCCAGGGAATAAACTTGAGTACATGGGGTACATAAGCTACAGTGGTAGCACTTACTACAATCCATCTCTCAAAAGTCGAATCTCCATCACTCGAGACACATCCAAGAACCAGTACTACCTGCAGTTGAATTCTGTGACTACTGAGGACACAGCCACATATTACTGTGCAAGGGACTTCGATGTCTGGGGCGCAGGGACCACGGTCACCGTCTCCTCAGACATTGTGATGACTCAGTCTCAAAAATTCATGTCCACATCAGTAGGAGACAGGGTCAGCGTCACCTGCAAGGCCAGTCAGAATGTGGGTACTAATGTAGCCTGGTATCAACAGAAACCAGGGCAATCTCCTAAAGCACTGATTTACTCGGCATCCTACAGGTACAGTGGAGTCCCTGATCGCTTCACAGGCAGTGGATCTGGGACAGATTTCACTCTCACCATCAGCAATGTGCAGTCTGAAGACTTGGCAGAGTATTTCTGTCAGCAATATAACAGCTATCCTCTCACGTTCGGCTCGGGGACTAAGCTAGAAATAAAA"

In [3]:
chain_2_start_idx = 336
replay_seq_to_contexts = lambda seq: utils.padded_fivemer_contexts_of_paired_sequences(
        seq, chain_2_start_idx
    )

In [4]:
replay_mutator = mutators.ContextMutator(
        mutability=mutability,
        substitution=substitution,
        seq_to_contexts=replay_seq_to_contexts,
    )

In [5]:
replay_mutation_response = responses.SequenceContextMutationResponse(mutability, replay_seq_to_contexts)

In [6]:
gp_map = gpmap.ConstantGPMap(1.0)

In [7]:
def make_replay_ish_tree(base_seed):
    tree = bdms.TreeNode()
    tree.sequence = replay_naive
    tree.x = gp_map(tree.sequence)
    for seed in range(1000):
        try:
            tree.evolve(
                20,
                birth_rate=responses.ConstantResponse(0.35),
                mutator=mutators.SequencePhenotypeMutator(replay_mutator, gp_map),
                mutation_rate=replay_mutation_response,
                min_survivors=20,
                seed=base_seed+seed,
            )
            return tree
        except bdms.TreeError:
            print("try failed")
            continue

tree = make_replay_ish_tree(0)
print("tip count: ", len(tree))
tree.sample_survivors(n = 60)
tree.prune()

tip count:  509


In [8]:
print(tree)


                     /-1573
                  /-|
               /-|   \-1671
              |  |
            /-|   \-1269
           |  |
           |  |   /-1817
           |   \-|
           |     |   /-1331
         /-|      \-|
        |  |         \-1861
        |  |
        |  |      /-1571
        |  |   /-|
        |  |  |  |   /-1752
        |   \-|   \-|
        |     |      \-1382
        |     |
        |      \-1113
        |
        |         /-913
        |      /-|
        |     |   \-1822
        |     |
        |     |      /-1401
        |     |     |
        |     |     |         /-1785
      /-|     |   /-|      /-|
     |  |   /-|  |  |     |  |   /-933
     |  |  |  |  |  |   /-|   \-|
     |  |  |  |  |  |  |  |      \-1205
     |  |  |  |  |   \-|  |
     |  |  |  |  |     |   \-1721
     |  |  |  |  |     |
     |  |  |  |  |      \-1289
     |  |  |   \-|
     |  |  |     |         /-1836
     |  |  |     |      /-|
     |  |  |     |     |  |   /-1790
     

In [9]:
" we are going for about 10 mutations at the deepest node "

' we are going for about 10 mutations at the deepest node '

In [10]:
tree.total_mutations = 0

for node in tree.iter_descendants(strategy="preorder"):
    node.total_mutations = node.n_mutations + node.up.total_mutations

[leaf.total_mutations for leaf in tree.iter_leaves()]

[8,
 8,
 13,
 7,
 9,
 9,
 12,
 13,
 12,
 11,
 11,
 10,
 7,
 12,
 11,
 11,
 11,
 8,
 13,
 10,
 9,
 12,
 12,
 9,
 9,
 3,
 11,
 8,
 6,
 6,
 11,
 11,
 11,
 7,
 10,
 10,
 8,
 13,
 9,
 10,
 7,
 11,
 13,
 10,
 12,
 9,
 9,
 7,
 10,
 10,
 10,
 8,
 9,
 8,
 11,
 9,
 13,
 9,
 13,
 11]

In [11]:
with open("tree.nwk", "w") as fp:
    fp.write(tree.write()+"\n")

In [12]:
utils.write_leaf_sequences_to_fasta(tree, "seqs.fasta", naive=replay_naive)