### NB-2: Infer empirical trees along chromosomes

Slide along chromosomes with known genealogies, infer raxml trees, and explore BUCKy like methods for inferring trees more accurately and with uncertainty. 

In [20]:
# pip install strange
# conda install seq-gen -c bioconda
# conda install toytree -c eaton-lab
# #strange includes local raxml-ng 

In [21]:
import toytree
import strange
import numpy as np
import ipyparallel as ipp

In [22]:
assert int(toytree.__version__.split(".")[-1]) >= 19, "update toytree"
print("toytree", toytree.__version__)
print("strange", strange.__version__)

toytree 0.1.21
strange 0.2.1-dev


### Connect to parallel client

In [23]:
ipyclient = ipp.Client()

### Raxml sliding inference

In [24]:
rslide = strange.SlidingWindow(name='imb5', workdir="../tests/", ipyclient=ipyclient)
rslide.run_raxml_sliding_windows(500, 500)
rslide.raxml_table.head()

[####################] 100% 0:00:04 | inferring raxml trees 


Unnamed: 0,start,stop,nsnps,tree
0,0,500,72,"(8:0.016051,(6:0.02253,7:0.024573)100:0.002164..."
1,500,1000,78,"((3:0.001993,4:0.010091)100:0.006036,(7:0.0301..."
2,1000,1500,79,"(7:0.012121,(8:0.026509,6:0.039164)100:0.00422..."
3,1500,2000,57,"(7:0.007263,(8:0.023772,6:0.02333)100:0.003889..."
4,2000,2500,77,"(7:0.012098,(8:0.041108,6:0.016174)100:1e-06,(..."


### MrBayes sliding inference

In [25]:
mslide = strange.SlidingWindow(name='imb5', workdir="../tests/", ipyclient=ipyclient)
mslide.run_mb_sliding_windows(500, 500)
mslide.mb_table.head()

[####################] 100% 0:09:06 | inferring mb trees 


Unnamed: 0,start,stop,nsnps,tree
0,0,500,72,"(1:0.00121765,2:0.00796636,(5:0.0140468,((3:0...."
1,500,1000,78,"(1:0.0165178,2:0.00789329,(5:0.0195536,((3:0.0..."
2,1000,1500,79,"(1:0.00808141,2:0.0135173,(5:0.017082,((3:0.01..."
3,1500,2000,57,"(1:0.00472643,5:0.013629,(2:0.00980401,((3:0.0..."
4,2000,2500,77,"(1:0.0129995,(4:0.0138949,(2:0.0100658,3:0.002..."


### View inferred trees in each window

In [26]:
# view trees on a grid
rtrees = toytree.mtree([rslide.raxml_table.tree[i] for i in range(5, 15)])
rtrees.treelist = [i.root("8") for i in rtrees.treelist]
rtrees.draw_tree_grid(
    x=2, y=5,
    tip_labels_align=True
);

In [27]:
# view trees on a grid
mtrees = toytree.mtree([mslide.mb_table.tree[i] for i in range(5, 15)])
mtrees.treelist = [i.root("8") for i in mtrees.treelist]
mtrees.draw_tree_grid(
    x=2, y=5,
    tip_labels_align=True
);

### Robinson-Foulds distances

In [28]:
# compare each tree to the true tree (rslide.tree)
rfs = [i.treenode.robinson_foulds(rslide.tree.treenode)[0] for i in rtrees.treelist]
rfs

[4, 6, 6, 8, 8, 10, 8, 6, 4, 4]