### NB-2: Infer empirical trees along chromosomes

Slide along chromosomes with known genealogies, infer raxml trees, and explore BUCKy like methods for inferring trees more accurately and with uncertainty. 

In [1]:
# pip install strange
# conda install seq-gen -c bioconda
# conda install toytree -c eaton-lab
# #strange includes local raxml-ng 

In [2]:
import os
import numpy as np
import pandas as pd
import strange
import toytree
import toyplot
import ipyparallel as ipp

In [3]:
assert int(toytree.__version__.split(".")[-1]) >= 19, "update toytree"
print("toytree", toytree.__version__)
print("strange", strange.__version__)

toytree 0.1.19
strange 0.2.1-dev


### Connect to parallel client

In [4]:
ipyclient = ipp.Client()

            Controller appears to be listening on localhost, but not on this machine.
            If this is true, you should specify Client(...,sshserver='you@oud')
            or instruct your controller to listen on an external IP.


### Load data sets from notebook 1

In [5]:
self = strange.SlidingWindow("imb5", "../tests/", ipyclient)


### Infer raxml trees over windows

In [6]:
self.run_raxml_sliding_windows(window_size=1000, slide_interval=500)

[####################] 100% 0:00:53 | inferring raxml trees 

### raxml_table shows trees at each window

In [7]:
self.raxml_table.head()

Unnamed: 0,start,stop,nsnps,tree
0,0,1000,155,"(6:0.022693,(5:0.024129,(7:0.021552,8:0.036132..."
1,500,1500,148,"(6:0.02447,(5:0.020323,(7:0.019418,8:0.027696)..."
2,1000,2000,143,"((5:0.017207,6:0.024403)100:1e-06,(7:0.020452,..."
3,1500,2500,131,"(5:0.012121,6:0.01722,((8:0.030891,7:0.022536)..."
4,2000,3000,131,"(8:0.028493,(5:0.012141,6:0.014177)100:1e-06,(..."


### Functions to compare raxml trees to true tree in each interval

Each tree should be rooted the same as the species tree...

In [24]:
# view trees on a grid
mtree = toytree.mtree([self.raxml_table.tree[i] for i in range(5, 15)])
mtree.treelist = [i.root("8") for i in mtree.treelist]
mtree.draw_tree_grid(
    x=2, y=5,
    tip_labels_align=True
);

### Robinson-Foulds distances

In [18]:
rfs = [i.treenode.robinson_foulds(self.tree.treenode)[0] for i in mtree.treelist]
rfs

[6, 4, 4, 4, 6]