### imports

In [1]:
import numpy as np
import strange
import toytree
import ipyparallel as ipp
import pandas as pd
from numba import jit
from collections import Counter
import numba
import h5py

### make tree

In [2]:
rtr = toytree.rtree().coaltree(ntips=8,seed=42)
rtr3 = rtr.mod.node_scale_root_height(3)
rtr3.draw();

### simulate!

In [3]:
Ne = 50000
mut = 1e-8
kwargs = {
    "workdir": "../tests",
    "mutation_rate": mut,
    "recombination_rate": 1e-9,
    "theta": Ne*mut*4,
    "length": int(1e6), 
    "get_sequences": True,
    "random_seed": 42,
}

# simulation object
coal8 = strange.Coalseq(tree=rtr3, name="coal8", **kwargs)

In [4]:
coal8.tree_table.head(10)

Unnamed: 0,end,length,mstree,nsnps,start,treeheight
0,54,54,"((8:204293.32434399396880,(6:64174.00872287491...",4,0,473009
1,568,514,"((8:204293.32434399396880,(6:64174.00872287491...",6,54,473009
2,675,107,"((8:204293.32434399396880,(6:64174.00872287491...",2,568,473009
3,2454,1779,"((8:204293.32434399396880,(6:64174.00872287491...",31,675,473009
4,2515,61,"((8:204293.32434399396880,(6:64174.00872287491...",1,2454,473009
5,2655,140,"((8:204293.32434399396880,(6:64174.00872287491...",1,2515,546145
6,2960,305,"((8:204293.32434399396880,(6:64174.00872287491...",5,2655,546145
7,3420,460,"((8:204293.32434399396880,(6:64174.00872287491...",8,2960,546145
8,3690,270,"(((4:104748.95632937064511,5:104748.9563293706...",8,3420,546145
9,4288,598,"(((4:104748.95632937064511,5:104748.9563293706...",8,3690,546145


### Let's run MrBayes on each gene tree

In [5]:
ipyclient = ipp.Client()
ipyclient

<ipyparallel.client.client.Client at 0x181ae7e950>

In [6]:
sliding_obj = strange.SlidingWindow(name='coal8',workdir='../tests/',ipyclient=ipyclient)

In [7]:
sliding_obj.run_mb_mstrees()

[####################] 100% 0:14:43 | inferring mb trees on mstrees 
consolidating...
done.


### So now we've run MrBayes on each gene tree, made a key that has indexed every observed topology, and produced a table of how often each topology was visited during each gene tree mcmc. 

We might want to also compute the probability, for each visited topology, of that topology being produced under the species tree. We can do that!

In [8]:
sliding_obj.add_probs_topokey()

[####################] 100% 0:03:51 | computing gene tree probabilities 

### Now let's sample!

make an mcmc object:

In [9]:
mcmc_obj = strange.MBmcmc(name = 'coal8',workdir = '../tests/')

In [10]:
mcmc_obj.run_mcmc(numtimes = 10000,  # number of total iterations
                  batchsize=5,  # number of samples after which to write to file
                  mixnum=5,  # number of rows to replace per column each iteration
                  p=.1,     # prob of keeping a move that lowers the score
                  sd_normal=2, # sd of normal decay function
                  sample_freq=100) # frequency of sampling

[####################] 100% 0:08:42 | running mcmc 

### load up results file:

In [11]:
results=h5py.File('../tests/coal8_mcmc_res.hdf5')

### look at resulting dimensions:

In [12]:
results['mcmcarr']

<HDF5 dataset "mcmcarr": shape (100, 4000, 1116), type "<i4">

### look at first sample:

In [13]:
np.array(results['mcmcarr'])[0]

array([[9062, 1969, 1008, ..., 6972,  555,  155],
       [9062, 1969, 1008, ..., 6972,  555,  155],
       [9062, 1969, 1008, ..., 6972,  555,  155],
       ...,
       [ 264,  590, 6206, ..., 2559,  155, 3238],
       [ 264,  590, 4165, ..., 2559, 8985, 3238],
       [ 264,  590, 1969, ..., 2559, 8985, 3238]], dtype=int32)

### look at last sample:

In [14]:
np.array(results['mcmcarr'])[99]

array([[4495, 5944, 5944, ..., 3558, 3558, 9905],
       [5406, 5406, 5406, ..., 3558, 9905, 3558],
       [4495, 5406, 5406, ..., 9905, 9905, 3558],
       ...,
       [7428, 5406, 4165, ..., 3558, 9905, 9905],
       [5944, 7428, 4165, ..., 9905, 3558, 9905],
       [4495, 4165, 5944, ..., 4384, 9905, 9905]], dtype=int32)