In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from collections import defaultdict
from ete3 import Tree

In [2]:
from models import SBN
from utils import generate

In [3]:
taxa = list('ABCDEFGH')
all_tree = generate(taxa)

In [4]:
beta = 0.001
K_v = 500

In [5]:
tree_space_cap = len(all_tree)

In [6]:
samp_freq = np.random.dirichlet(beta*np.ones(tree_space_cap))

In [7]:
samp_freq

array([8.42884149e-65, 0.00000000e+00, 2.13946050e-04, ...,
       1.63525196e-72, 0.00000000e+00, 0.00000000e+00])

In [8]:
emp_tree_freq = {tree:samp_freq[i] for i, tree in enumerate(all_tree) if samp_freq[i]>1e-20}

In [9]:
emp_tree_freq

{Tree node '' (0x7878a6ba2c7): np.float64(0.00021394604975263117),
 Tree node '' (0x7878a6b01cd): np.float64(0.0010685424114888582),
 Tree node '' (0x7878a6b2a91): np.float64(1.4279144260244653e-07),
 Tree node '' (0x7878a6b20d6): np.float64(3.6291588897519335e-16),
 Tree node '' (0x7878a6b3240): np.float64(5.15178853950829e-06),
 Tree node '' (0x7878a6ac9d6): np.float64(2.1775590193776014e-12),
 Tree node '' (0x7878a6ae88e): np.float64(1.0477562457546182e-15),
 Tree node '' (0x7878a6afb8b): np.float64(9.417932946880767e-07),
 Tree node '' (0x7878a6a8776): np.float64(1.3138238246144348e-17),
 Tree node '' (0x7878a6a50af): np.float64(1.3557413795084876e-14),
 Tree node '' (0x7878a6a442e): np.float64(9.999968780067532e-10),
 Tree node '' (0x7878a41c285): np.float64(4.258189443957087e-18),
 Tree node '' (0x7878a41e1b5): np.float64(6.47130952903739e-07),
 Tree node '' (0x7878a4180c1): np.float64(0.002113471533719349),
 Tree node '' (0x7878a41f5ac): np.float64(2.391962726189263e-15),
 Tree 

In [10]:
samp_trees = np.random.choice(all_tree, size=K_v, p=samp_freq)

In [11]:
samp_trees

array([Tree node '' (0x78785df9ea3), Tree node '' (0x7878820ace5),
       Tree node '' (0x78786877bc7), Tree node '' (0x7878465fdd6),
       Tree node '' (0x78785df9ea3), Tree node '' (0x787874f0679),
       Tree node '' (0x78785df9ea3), Tree node '' (0x78786da5eb5),
       Tree node '' (0x78784649cd6), Tree node '' (0x78785df9ea3),
       Tree node '' (0x787867ce8e8), Tree node '' (0x7878465fdd6),
       Tree node '' (0x7878465fdd6), Tree node '' (0x7878820ace5),
       Tree node '' (0x78789bf96eb), Tree node '' (0x78785df9ea3),
       Tree node '' (0x7878465fdd6), Tree node '' (0x78786da5eb5),
       Tree node '' (0x7878465fdd6), Tree node '' (0x78787ad3ebe),
       Tree node '' (0x787875935d6), Tree node '' (0x7878465fdd6),
       Tree node '' (0x78785de5d07), Tree node '' (0x787845df3f1),
       Tree node '' (0x787845df3f1), Tree node '' (0x7878465fdd6),
       Tree node '' (0x78785df9ea3), Tree node '' (0x787867ce8e8),
       Tree node '' (0x78786089b58), Tree node '' (0x78785df9e

In [12]:
samp_trees[0].get_topology_id()

'd793ccea88aa91fcbd76ab52f00dd9b7'

In [13]:
sample_tree_count_dict = defaultdict(int)
for tree in samp_trees:
    sample_tree_count_dict[tree.get_topology_id()] += 1

In [14]:
sample_tree_count_dict

defaultdict(int,
            {'d793ccea88aa91fcbd76ab52f00dd9b7': 67,
             '2c1dcbd9f58b72726d8d314aa3247eb8': 12,
             'c225e424a2cdfee6150944bab71a01fb': 1,
             'defeedbb14f16409162dc32ae8d92e32': 119,
             '73b4f359c9255a4bc21a7932b4c01959': 13,
             '6349e0b78ea8c46eb65f0f2c7b17cad7': 92,
             '405879943512aae474f9f7c2eba82bca': 21,
             '4ace19aee7c20bed3d59de4ba5bb0f71': 38,
             'edd9928d6b73e152571018b07ec28acf': 5,
             '60ae074efb3bd7571a5d36a72d744d05': 3,
             'd24e14ab6a3c9b0d7da94ad28c7cb91e': 2,
             '8ef3be8721190fc412f4272cb51fc2e5': 2,
             '87cae1b3dc920980f96fd7900b5d17fa': 11,
             '88cd04dd5359c310461bea72d32bebd6': 14,
             '5e700d6a01f6e084b9ffa62c7ccd11c6': 5,
             'ad7335038be964eb02ac4e7842130cfc': 7,
             '07d9eb88bbc0ccc18efccf49f5cfc053': 23,
             '20876bb7d2f853277a43942d031b6f6b': 3,
             '19753a590b816b1080e262

In [15]:
unique_samp_trees_dict = {tree.get_topology_id: tree for tree in samp_trees}

In [16]:
sum(sample_tree_count_dict.values())

500

In [18]:
n_taxa = 10
random_tree = Tree()
random_tree.populate(n_taxa)

In [19]:
leaf_names = list("ABCDEFGHIJ")

In [20]:
leaf_names

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']

In [22]:
for i, leaf in enumerate(random_tree.iter_leaves()):
    leaf.name = leaf_names[i]

In [23]:
print(random_tree)


         /-A
      /-|
     |  |   /-B
     |   \-|
     |     |   /-C
   /-|      \-|
  |  |         \-D
  |  |
  |  |      /-E
  |  |   /-|
--|   \-|   \-F
  |     |
  |      \-G
  |
  |   /-H
   \-|
     |   /-I
      \-|
         \-J


In [24]:
for child in random_tree.children:
    print(child)


      /-A
   /-|
  |  |   /-B
  |   \-|
  |     |   /-C
--|      \-|
  |         \-D
  |
  |      /-E
  |   /-|
   \-|   \-F
     |
      \-G

   /-H
--|
  |   /-I
   \-|
      \-J


In [25]:
random_tree_unrooted = random_tree.unroot()

In [32]:
print(random_tree)


None


In [33]:
print(random_tree.children[0])


   /-H
--|
  |   /-I
   \-|
      \-J


In [None]:
print(random_tree.children[1])


   /-A
--|
  |   /-B
   \-|
     |   /-C
      \-|
         \-D


In [35]:
print(random_tree.children[2])


      /-E
   /-|
--|   \-F
  |
   \-G


In [37]:
print(random_tree.children[0].get_sisters()[0])


   /-A
--|
  |   /-B
   \-|
     |   /-C
      \-|
         \-D


In [38]:
print(random_tree.children[0].get_sisters()[1])


      /-E
   /-|
--|   \-F
  |
   \-G


In [39]:
print(random_tree)


      /-H
   /-|
  |  |   /-I
  |   \-|
  |      \-J
  |
  |   /-A
  |--|
--|  |   /-B
  |   \-|
  |     |   /-C
  |      \-|
  |         \-D
  |
  |      /-E
  |   /-|
   \-|   \-F
     |
      \-G


In [41]:
print(random_tree.children[1].children[0])


--A


In [42]:
print(random_tree.children[1].children[0].get_sisters())

[Tree node '' (0x787844d0e8b)]


In [43]:
print(random_tree.children[1].children[0].get_sisters()[0])


   /-B
--|
  |   /-C
   \-|
      \-D


In [44]:
z = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6}
print(len(z))

6
