In [1]:
from ete3 import Tree
import os
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd

from physhapes.simulate import simulate_shapes
from physhapes.helper_functions import get_flat_values_sim


In [2]:
# load phylogeny 
tree = Tree('../data/chazot_subtree_rounded.nw')
print(tree)
# load root shape for simulation 
root = np.genfromtxt('../data/hercules_forewing_n=20.csv', delimiter=',')


      /-niepelti
   /-|
  |   \-theseus
--|
  |   /-hercules
   \-|
     |   /-amphitryon
      \-|
         \-telemachus


In [3]:
leafidx = []
inneridx = []
species = []
i = 0
for node in tree.traverse('levelorder'):
    if node.is_leaf():
        print(node.name)
        leafidx.append(i)
        species.append(node.name)
    else:
        inneridx.append(i)
    i+=1
print(leafidx)
print(inneridx)
print(species)

niepelti
theseus
hercules
amphitryon
telemachus
[3, 4, 5, 7, 8]
[0, 1, 2, 6]
['niepelti', 'theseus', 'hercules', 'amphitryon', 'telemachus']


In [4]:
n_datasets = 20
sigma = 0.6
alpha = 0.025
dt = 0.05
rb = 0

In [5]:
for i in range(n_datasets):
    ds = np.random.randint(0, 2**32)
    outputpath = f'sigma={sigma}_alpha={alpha}_dt={dt}/seed={ds}'
    os.makedirs(outputpath, exist_ok=True)
    print(f'Simulating dataset {i+1} out of {n_datasets} with seed {ds} ')
    simtree = simulate_shapes(ds=ds, dt=dt, sigma=sigma, alpha=alpha, root=root, tree=tree, rb=rb)
    flat_true_tree = np.array(get_flat_values_sim(simtree)) 
    np.savetxt(f'{outputpath}/flat_true_tree.csv', flat_true_tree, delimiter=",")
    [plt.plot(flat_true_tree[idx][0::2], flat_true_tree[idx][1::2], '--o', markersize=3, label='Simulated data') for idx in leafidx]
    plt.savefig(f'{outputpath}/simulated_leaves.pdf', bbox_inches='tight')  
    plt.close()
    
    leaves = pd.DataFrame(flat_true_tree[leafidx, :], index=species)
    #leaves.columns = [['X'+str(i), 'Y'+str(i)] for i in range(leaves.shape[1] // 2)]
    leaves.columns = [f'X{i}' if j % 2 == 0 else f'Y{i}' 
                  for i in range(leaves.shape[1] // 2) for j in range(2)]
    leaves.to_csv(f'{outputpath}/leaves.csv', index=False, header=False)
    flat_true_tree_df = pd.DataFrame(flat_true_tree)
    flat_true_tree_df.to_csv(f'{outputpath}/flat_true_tree.csv', index=False, header=False) 

Simulating dataset 1 out of 20 with seed 3913316537 
Simulating dataset 2 out of 20 with seed 1855262077 
Simulating dataset 3 out of 20 with seed 3993032834 
Simulating dataset 4 out of 20 with seed 741734547 
Simulating dataset 5 out of 20 with seed 2272736646 
Simulating dataset 6 out of 20 with seed 1757368330 
Simulating dataset 7 out of 20 with seed 3656166487 
Simulating dataset 8 out of 20 with seed 2299487251 
Simulating dataset 9 out of 20 with seed 2244973671 
Simulating dataset 10 out of 20 with seed 3102274831 
Simulating dataset 11 out of 20 with seed 864561893 
Simulating dataset 12 out of 20 with seed 3972273939 
Simulating dataset 13 out of 20 with seed 100793516 
Simulating dataset 14 out of 20 with seed 1035959670 
Simulating dataset 15 out of 20 with seed 2102194496 
Simulating dataset 16 out of 20 with seed 1020511817 
Simulating dataset 17 out of 20 with seed 3101549169 


KeyboardInterrupt: 