# Astral tree on outgroup reference- aligned reads

This notebook makes a species-level phylogeny for 38 Amaranth samples. <br>
Astral uses individual genes - here using tree_slider to determine the length - and estimates phylogeny in 2 step process. <br>
3RAD data assembled with ipyrad. modest filtering (see below) <br>

To make sure there is no problem using an ingroup reference genome that we are aligning all reads to, I am also testing beet and quinoa as the reference.  In both cases, beet is the outgroup, and quinoa is not included in the tree at all. <br>

last run on Nov 12, 2020

In [1]:
# import packages into python
import ipyrad as ip
import ipyrad.analysis as ipa
import toytree
import toyplot.svg
import pandas as pd

# connect to parallel client
#import ipyparallel as ipp
#ipyclient = ipp.Client()
#ip.cluster_info(ipyclient)

# print the version of ipyrad you are running
print('ipyrad', ipa.__version__)
print('toytree', toytree.__version__)

ipyrad 0.9.61
toytree 2.0.4


In [2]:
imap={"acanthochiton": ["acanthochiton_SLH_AL_0001","acanthochiton_SLH_AL_0002"],
      "acutilobus": ["acutilobus_SLH_AL_0003","acutilobus_SLH_AL_0004"],
      "albus": ["albus_SLH_AL_0006", "albus_SLH_AL_0009", "albus_SLH_AL_0010"],
      "arenicola": ["arenicola_SLH_AL_0013","arenicola_SLH_AL_0018","arenicola_SLH_AL_0012","arenicola_SLH_AL_0015"],
 #     "asplundii": [],
      "australis": ["australis_SLH_AL_0020","australis_SLH_AL_0021"],
      "blitoides": ["blitoides_SLH_AL_0028"], #"blitoides_SLH_AL_0023",
      "blitum": ["blitum-blitum_SLH_AL_0029", "blitum-oleraceus_SLH_AL_0034", "blitum-pseudogracilis_SLH_AL_0037"],
      "californicus": ["californicus_SLH_AL_0039"],
      "cannabinus": ["cannabinus_SLH_AL_0040", "cannabinus_SLH_AL_0041"], #"cannabinus_SLH_AL_0042"],
      "caudatus": ["caudatus_SLH_AL_0102","caudatus_SLH_AL_0110","caudatus_SLH_AL_0116","caudatus_SLH_AL_0322","caudatus_SLH_AL_0540"],
      "crassipes": ["crassipes_SLH_AL_0599","crassipes_SLH_AL_0600"],
      "cruentus": ["cruentus_SLH_AL_0679", "cruentus_SLH_AL_0699", "cruentus_SLH_AL_0728", "cruentus_SLH_AL_0804", "cruentus_SLH_AL_0832"],
      "hybridus2": ["hybridus_SLH_AL_1060", "hybridus_SLH_AL_1098"],
      "deflexus": ["deflexus_SLH_AL_0951","deflexus_SLH_AL_0952", "deflexus_SLH_AL_0955","deflexus_SLH_AL_0953","deflexus_SLH_AL_0954"],
      "dubius": ["dubius_SLH_AL_0965","dubius_SLH_AL_0979","dubius_SLH_AL_0992"],
      "fimbriatus": ["fimbriatus_SLH_AL_0998"], #"fimbriatus_SLH_AL_0997",
      "floridanus": ["floridanus_SLH_AL_1000"],
      "graecizans": ["graecizans-aschersonianus_SLH_AL_1009", "graecizans-silvestris_SLH_AL_1013", "graecizans-thellungianus_SLH_AL_1014",
                    "asplundii_SLH_AL_0019"],
      "greggii": ["greggii_SLH_AL_1015", "greggii_SLH_AL_1016"],
      "hybridus1": ["hybridus_SLH_AL_0001-restricted", "hybridus_SLH_AL_1117"],
      "hybridus3": ["hybridus_SLH_AL_1099"],
      "hypochondriacus": ["hypochondriacus_SLH_AL_1178", "hypochondriacus_SLH_AL_1197", "hypochondriacus_SLH_AL_1264", "hypochondriacus_SLH_AL_1285","hypochondriacus_SLH_AL_2282", "hypochondriacus_SLH_AL_2436"],
      "muricatus": ["muricatus_SLH_AL_2634"],
      "palmeri": ["palmeri-aff_SLH_AL_0017", "palmeri_SLH_AL_2637", "palmeri_SLH_AL_2644", "palmeri_SLH_AL_2647", "palmeri_SLH_AL_2649", "palmeri_SLH_AL_2650",
                  "palmeri_SLH_AL_2635","palmeri_SLH_AL_2636","palmeri_SLH_AL_2638","palmeri_SLH_AL_2639","palmeri_SLH_AL_2640","palmeri_SLH_AL_2641","palmeri_SLH_AL_2642","palmeri_SLH_AL_2643","palmeri_SLH_AL_2645","palmeri_SLH_AL_2646",
                  "palmeri_SLH_AL_159-contemp","palmeri_SLH_AL_163-contemp","palmeri_SLH_AL_173-contemp","palmeri_SLH_AL_174-contemp","palmeri_SLH_AL_235-contemp"],
      "powellii": ["powellii-bouchonii_SLH_AL_2653", "powellii-powellii_SLH_AL_2663", "powellii-powellii_SLH_AL_2665"],
      "pumilus": ["pumilus_SLH_AL_5-restricted","pumilus_SLH_AL_7-restricted"],
      "quitensis": ["quitensis_SLH_AL_2671", "quitensis_SLH_AL_2675","quitensis_SLH_AL_2753"],
      "retroflexus": ["retroflexus_SLH_AL_2770", "retroflexus_SLH_AL_2773", "retroflexus_SLH_AL_2780"],
      "spinosus": ["spinosus_SLH_AL_2792", "spinosus_SLH_AL_2793", "spinosus_SLH_AL_2806", "spinosus_SLH_AL_2809", "spinosus_SLH_AL_2811"],
      "standleyanus": ["standleyanus_SLH_AL_2815","standleyanus_SLH_AL_2816"],
      "tamaulipensis": ["tamaulipensis_SLH_AL_2817"],
      "torreyi": ["torreyi_SLH_AL_2818"],
      "tricolor": ["tricolor_SLH_AL_2869", "tricolor_SLH_AL_2940", "tricolor_SLH_AL_2953", "tricolor_SLH_AL_2978"],
      "tuberculatus": ["tuberculatus_SLH_AL_0009-restricted", "tuberculatus_SLH_AL_3003", "tuberculatus_SLH_AL_3017", "tuberculatus_SLH_AL_3027", "tuberculatus_SLH_AL_3045",
                        "tuberculatus_SLH_AL_2999","tuberculatus_SLH_AL_3000","tuberculatus_SLH_AL_3001","tuberculatus_SLH_AL_3002","tuberculatus_SLH_AL_3004","tuberculatus_SLH_AL_3005",
                        "tuberculatus_SLH_AL_3006","tuberculatus_SLH_AL_3007","tuberculatus_SLH_AL_3008","tuberculatus_SLH_AL_3009","tuberculatus_SLH_AL_3010",
                        "tuberculatus_SLH_AL_3011","tuberculatus_SLH_AL_3012","tuberculatus_SLH_AL_3013","tuberculatus_SLH_AL_3014","tuberculatus_SLH_AL_3015",
                        "tuberculatus_SLH_AL_3016","tuberculatus_SLH_AL_3018","tuberculatus_SLH_AL_3019","tuberculatus_SLH_AL_3020","tuberculatus_SLH_AL_3021",
                        "tuberculatus_SLH_AL_3022","tuberculatus_SLH_AL_3023","tuberculatus_SLH_AL_3024","tuberculatus_SLH_AL_3025","tuberculatus_SLH_AL_3026",
                        "tuberculatus_SLH_AL_3028","tuberculatus_SLH_AL_3029","tuberculatus_SLH_AL_3030","tuberculatus_SLH_AL_3031","tuberculatus_SLH_AL_3032",
                        "tuberculatus_SLH_AL_3033","tuberculatus_SLH_AL_3034","tuberculatus_SLH_AL_3035","tuberculatus_SLH_AL_3036",#"tuberculatus_SLH_AL_3037",
                        "tuberculatus_SLH_AL_3038","tuberculatus_SLH_AL_3039","tuberculatus_SLH_AL_3041","tuberculatus_SLH_AL_3042","tuberculatus_SLH_AL_3043",
                        "tuberculatus_SLH_AL_3044","tuberculatus_SLH_AL_3046","tuberculatus_SLH_AL_154-contemp","tuberculatus_SLH_AL_155-contemp",
                        "tuberculatus_SLH_AL_156-contemp","tuberculatus_SLH_AL_157-contemp","tuberculatus_SLH_AL_160-contemp","tuberculatus_SLH_AL_165-contemp",
                        "tuberculatus_SLH_AL_169-contemp","tuberculatus_SLH_AL_175-contemp","tuberculatus_SLH_AL_176-contemp","tuberculatus_SLH_AL_202-contemp",
                        "tuberculatus_SLH_AL_206-contemp","tuberculatus_SLH_AL_208-contemp","tuberculatus_SLH_AL_236-contemp","tuberculatus_SLH_AL_237-contemp"],
        "tucsonensis": ["tucsonensis_SLH_AL_3068"],
        "viridis": ["viridis_SLH_AL_3047", "viridis_SLH_AL_3062"],
        "watsonii": ["watsonii_SLH_AL_3065"],
        "wrightii": ["wrightii_SLH_AL_3066", "wrightii_SLH_AL_3067"],    
      "beet":["beet"],
     }

In [3]:
color_dict = {
"quitensis": "orange",# 'quitensis',
"caudatus": "orange",# 'caudatus',
"hypochondriacus": "orange",# 'hypochondriacus',
"reference": "orange",
"hybridus1": "orange",# 'hybridus',
"hybridus2": "orange",# 'hybridus',
"hybridus3": "orange",# 'hybridus',
"cruentus": "orange",# 'cruentus',
"wrightii": "orange",# 'wrightii',
"retroflexus": "orange",# 'retroflexus',
"powellii": "orange",# 'powellii',
"acutilobus": "orange",# 'acutilobus',
"watsonii": "orange",# 'watsonii',
"palmeri": "orange",# 'palmeri',
"spinosus": "orange",# 'spinosus',
"dubius": "orange",# 'dubius',
"arenicola": "blue",# 'arenicola',
"greggii": "blue",# 'greggii',
"acanthochiton": "blue",# 'acanthochiton',
"pumilus": "blue",# 'pumilus',
"floridanus": "blue",# 'floridanus',
"tuberculatus": "blue",# 'tuberculatus',
"cannabinus": "blue",# 'cannabinus',
"australis": "blue",# 'australis',
"viridis": "red",# 'viridis',
"deflexus": "red",# 'deflexus',
"muricatus": "red",# 'muricatus',
"standleyanus": "red",# 'standleyanus',
# "asplundii": "purple",# 'asplundii',
"graecizans": "purple",# 'graecizans',
"tricolor": "purple",# 'tricolor',
"blitum": "purple",# 'blitum',
"albus": "green",# 'albus',
"californicus": "green",# 'californicus',
"blitoides": "green",# 'blitoides',
"torreyi": "green",# 'torreyi',
"crassipes": "green",# 'crassipes',
"tamaulipensis": "green",# 'tamaulipensis',
"fimbriatus": "green",# 'fimbriatus',
"tucsonensis": "green",# 'tucsonensis',
"beet": "black",# 'quinoa'
# "unknown": "black",
}

# Quinoa reference

In [4]:
# the path to your HDF5 formatted seqs file
dataQ = "/rigel/dsi/users/slh2181/tuberculatus_plate/ipyrad/Quinoa_ref_all_samples_min2_outfiles/Quinoa_ref_all_samples_min2.seqs.hdf5"
#set output directory
OUTDIR = "/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/"

In [5]:
# check scaffold idx (row) against scaffold names
ipa.treeslider(dataQ).scaffold_table.head(20)

Unnamed: 0,scaffold_name,scaffold_length
0,NW_018742204.1,23816425
1,NW_018742205.1,8241039
2,NW_018742206.1,786584
3,NW_018742207.1,33371
4,NW_018742208.1,38464
5,NW_018742209.1,160963
6,NW_018742210.1,41449
7,NW_018742211.1,1666661
8,NW_018742212.1,22965
9,NW_018742213.1,142830


### Astral does not use bootstrap

In [6]:
#run RAxML on "genes" that are 1M bp long in treeslider
ts = ipa.treeslider(
        name='Quinoa_1Mb',  #this name can't end in _2
        data=dataQ,
        workdir=OUTDIR,
        scaffold_idxs=range(3487),
        window_size=1000000,
        slide_size= 1000000,  
        inference_method="raxml",  #options are raxml and mrbayes
        inference_args={"N": 1, "T": 24}, #number of bootstraps and # cores
        consensus_reduce=True, # this tells it to make 1 sequence for each key in dictionary
        minsnps=100,  #within window_size, it must have at least this much diversity
        mincov=4,   #For example, mincov=0.5 will require that 50% of samples contain a site that is not N or - for the site to be included in the alignment. 
        rmincov=0,
        imap=imap,
        minmap= {i: 0.1 for i in imap}, #half of each key in imap dictionary
  #      keep_all_files=True,
)

In [7]:
#check that the names I specificed in IMAP are the names in the dataset.
# 1. Print names in dataset
goodnames = set(ts._pnames)
goodnames

{'SLH_AL_0032-contem',
 'acanthochiton_SLH_AL_0001',
 'acanthochiton_SLH_AL_0002',
 'acutilobus_SLH_AL_0003',
 'acutilobus_SLH_AL_0004',
 'albus_SLH_AL_0006',
 'albus_SLH_AL_0009',
 'albus_SLH_AL_0010',
 'amaranth-ref',
 'arenicola_SLH_AL_0012',
 'arenicola_SLH_AL_0013',
 'arenicola_SLH_AL_0015',
 'arenicola_SLH_AL_0018',
 'asplundii_SLH_AL_0019',
 'australis_SLH_AL_0020',
 'australis_SLH_AL_0021',
 'beet',
 'blitoides_SLH_AL_0023',
 'blitoides_SLH_AL_0028',
 'blitum-blitum_SLH_AL_0029',
 'blitum-oleraceus_SLH_AL_0034',
 'blitum-pseudogracilis_SLH_AL_0037',
 'californicus_SLH_AL_0039',
 'cannabinus_SLH_AL_0040',
 'cannabinus_SLH_AL_0041',
 'cannabinus_SLH_AL_0042',
 'caudatus_SLH_AL_0102',
 'caudatus_SLH_AL_0110',
 'caudatus_SLH_AL_0116',
 'caudatus_SLH_AL_0322',
 'caudatus_SLH_AL_0540',
 'crassipes_SLH_AL_0599',
 'crassipes_SLH_AL_0600',
 'cruentus_SLH_AL_0679',
 'cruentus_SLH_AL_0699',
 'cruentus_SLH_AL_0728',
 'cruentus_SLH_AL_0804',
 'cruentus_SLH_AL_0832',
 'deflexus_SLH_AL_0951',

In [8]:
# 2. Print the names that have typos
for key, vals in ts.imap.items():
    diff = set(vals).difference(goodnames)
    print(diff)

set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()


In [9]:
# run treeslider/RAxML.  this takes a long time (8hrs)
# make sure there is no temp directory existing because then this won't run.
ts.run(auto=True, force=True) #use auto=true when you have 24 engines

building database: nwindows=987; minsnps=100
[####################] 100% 2:53:48 | inferring trees 
tree_table written to /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Quinoa_1Mb.tree_table.csv


In [10]:
ts.tree_table.tail(10)

Unnamed: 0,scaffold,start,end,sites,snps,samples,missing,tree
977,3480,1000000,2000000,1175,60,39,0.21,
978,3480,2000000,3000000,1042,28,39,0.43,
979,3480,3000000,4000000,342,16,39,0.44,
980,3480,4000000,5000000,589,40,39,0.36,
981,3480,5000000,6000000,728,29,39,0.21,
982,3480,6000000,7000000,1464,52,39,0.43,
983,3480,7000000,8000000,555,36,39,0.56,
984,3480,8000000,9000000,228,9,39,0.54,
985,3480,9000000,10000000,904,51,39,0.49,
986,3480,10000000,11000000,181,3,39,0.6,


Check out the Treeslider results

In [11]:
ts.tree_table.describe()

Unnamed: 0,scaffold,start,end,sites,snps,samples,missing
count,987.0,987.0,987.0,987.0,987.0,987.0,987.0
mean,1630.228,2274000.0,3274000.0,424.198,16.943,30.979,0.377
std,1027.199,2870000.0,2870000.0,481.732,22.165,15.772,0.256
min,0.0,0.0,1000000.0,0.0,0.0,0.0,0.0
25%,747.0,0.0,1000000.0,57.0,0.0,39.0,0.18
50%,1654.0,1000000.0,2000000.0,251.0,8.0,39.0,0.41
75%,2489.0,3000000.0,4000000.0,637.0,25.5,39.0,0.545
max,3480.0,22000000.0,23000000.0,3034.0,157.0,39.0,0.9


In [12]:
Ast = ipa.astral(
    data= ts.tree_table,
#    data = "/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/Outgroup_reference/",
    name='Astral_Quinoa_1MB',
    workdir=OUTDIR,
   # bootsfile="/rigel/dsi/users/slh2181/tuberculatus_plate/Analysis/Consensus_tree3/tub_cons_noMaxSNP_1Mb_100SNP.bootsfiles.txt",
    imap=None,
    annotation=1,
   # gene_resampling=True,
   # nboots=100,
)

Ast.print_command()

java -jar /rigel/home/slh2181/miniconda3/bin/astral.5.7.1.jar -i /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/tmptrees.txt -o /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Quinoa_1MB.tre -t 1


In [13]:
Ast.run()

[astral.5.7.1.jar]
inferred tree written to (/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Quinoa_1MB.tre)


# Draw the Astral Tree

In [14]:
rooted1= Ast.tree.root("beet") #root on entire orange clade???
our_labels = rooted1.get_tip_labels()

sp_names= [i.split("_")[0].split("-")[0] for i in our_labels]
colors = [color_dict[sp] for sp in sp_names]

rooted1.treenode.support = 100
for node in rooted1.treenode.traverse():
    node.support = int(float(node.support))
    
canvas, axes, mark=rooted1.draw(tip_labels_align=True, node_labels="support", 
                          use_edge_lengths=False,                        
                          node_labels_style={"font-size": "15px"},
                        node_sizes=27,
                        node_style={
                                "fill": "white",
                                "stroke": "black",
                        },
                        tip_labels_colors=colors,
                        tip_labels_style={"font-size":"16px"},
                        width=600, height=800);

In [15]:
toyplot.svg.render(canvas,"/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Quinoa_1MB.svg")

### 500kb windows

In [16]:
#run RAxML on "genes" that are 1M bp long in treeslider
ts2 = ipa.treeslider(
        name='Quinoa_500kb',  #this name can't end in _2
        data=dataQ,
        workdir=OUTDIR,
        scaffold_idxs=range(3487),
        window_size=500000,
        slide_size= 500000,  
        inference_method="raxml",  #options are raxml and mrbayes
        #inference_args={"N": 1, "T": 24}, #number of bootstraps and # cores
    consensus_reduce=True, # this tells it to make 1 sequence for each key in dictionary
        minsnps=50,  #within window_size, it must have at least this much diversity
        mincov=4,   #For example, mincov=0.5 will require that 50% of samples contain a site that is not N or - for the site to be included in the alignment. 
    rmincov= 0,     
    imap=imap,
    minmap= {i: 0.1 for i in imap}, #half of each key in imap dictionary
  #  keep_all_files=True,
)

In [17]:
ts2.run(force=True, auto=True)

building database: nwindows=2169; minsnps=50
[####################] 100% 0:54:22 | inferring trees 
tree_table written to /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Quinoa_500kb.tree_table.csv


In [18]:
AstQ500 = ipa.astral(
        data= ts2.tree_table,
    #data = "/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/Ovble_locus_length/tub_consensus_500kb.tree_table.csv",
    name='Astral_Quinoa_500kB',
    workdir=OUTDIR,
    annotation=1,
)

AstQ500.print_command()

AstQ500.run()

java -jar /rigel/home/slh2181/miniconda3/bin/astral.5.7.1.jar -i /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/tmptrees.txt -o /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Quinoa_500kB.tre -t 1
[astral.5.7.1.jar]
inferred tree written to (/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Quinoa_500kB.tre)


In [19]:
rooted500 = toytree.tree(AstQ500.tree).root("beet")

rooted500.treenode.support = 100
for node in rooted500.treenode.traverse():
    node.support = int(float(node.support))
#r2 = rooted1.collapse_nodes(min_support=75)

our_labels = rooted500.get_tip_labels()
sp_names= [i.split("_")[0].split("-")[0] for i in our_labels]
colors = [color_dict[sp] for sp in sp_names]


canvas, axes, mark=rooted500.draw(tip_labels_align=True, node_labels="support", 
                          use_edge_lengths=False,                        
                          node_labels_style={"font-size": "15px"},
                        #node_markers="r2x1.25",
                        node_sizes=27,
                        node_style={
                                "fill": "white",
                                "stroke": "black",
                        #        "stroke-width": 1,
                        },
                         # tip_labels=newnames,
                        tip_labels_colors=colors,
                        tip_labels_style={"font-size":"16px"},
                        width=600, height=800);

In [20]:
toyplot.svg.render(canvas,"/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Quinoa_500kB.svg")

### 250kb windows

In [21]:
#run RAxML on "genes" that are 1M bp long in treeslider
ts3 = ipa.treeslider(
        name='Quinoa_250kb',  #this name can't end in _2
        data=dataQ,
        workdir=OUTDIR,
        scaffold_idxs=range(3487),
        window_size=250000,
        slide_size= 250000,  
        inference_method="raxml",  #options are raxml and mrbayes
       # inference_args={"N": 100, "T": 24}, #number of bootstraps and # cores
    consensus_reduce=True, # this tells it to make 1 sequence for each key in dictionary
        minsnps=25,  #within window_size, it must have at least this much diversity
        mincov=4,   #For example, mincov=0.5 will require that 50% of samples contain a site that is not N or - for the site to be included in the alignment. 
    rmincov= 0,     
    imap=imap,
    minmap= {i: 0.1 for i in imap}, #half of each key in imap dictionary
 #   keep_all_files=True,
)

In [22]:
ts3.run(force=True, auto=True)

building database: nwindows=4569; minsnps=25
[####################] 100% 1:10:19 | inferring trees 
tree_table written to /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Quinoa_250kb.tree_table.csv


In [23]:
Ast250 = ipa.astral(
   # data = "/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/vble_locus_length/tub_consensus_250kb.tree_table.csv",
    data = ts3.tree_table,
    name='Astral_Quinoa_250kB',
    workdir=OUTDIR,
    annotation=1,
)

Ast250.print_command()

Ast250.run()

java -jar /rigel/home/slh2181/miniconda3/bin/astral.5.7.1.jar -i /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/tmptrees.txt -o /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Quinoa_250kB.tre -t 1
[astral.5.7.1.jar]
inferred tree written to (/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Quinoa_250kB.tre)


In [24]:
rooted250 = toytree.tree(Ast250.tree).root("beet")

rooted250.treenode.support = 100
for node in rooted250.treenode.traverse():
    node.support = int(float(node.support))
#r2 = rooted1.collapse_nodes(min_support=75)

our_labels = rooted250.get_tip_labels()
sp_names= [i.split("_")[0].split("-")[0] for i in our_labels]
colors = [color_dict[sp] for sp in sp_names]


canvas, axes, mark=rooted250.draw(tip_labels_align=True, node_labels="support", 
                          use_edge_lengths=False,                        
                          node_labels_style={"font-size": "15px"},
                        #node_markers="r2x1.25",
                        node_sizes=27,
                        node_style={
                                "fill": "white",
                                "stroke": "black",
                        #        "stroke-width": 1,
                        },
                         # tip_labels=newnames,
                        tip_labels_colors=colors,
                        tip_labels_style={"font-size":"16px"},
                        width=600, height=800);

In [25]:
toyplot.svg.render(canvas,"/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Qunioa_250kB.svg")

# Beet

In [26]:
# the path to your HDF5 formatted seqs file
dataB = "/rigel/dsi/users/slh2181/tuberculatus_plate/ipyrad/Beet_ref_all_samples_outfiles/Beet_ref_all_samples.seqs.hdf5"
#set output directory
OUTDIR = "/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/"

In [27]:
# check scaffold idx (row) against scaffold names
ipa.treeslider(dataB).scaffold_table.head(20)

Unnamed: 0,scaffold_name,scaffold_length
0,NC_025812.2,34941034
1,NW_017567325.1,1523057
2,NW_017567326.1,4817789
3,NW_017567327.1,508235
4,NW_017567328.1,725263
5,NW_017567329.1,1524158
6,NW_017567330.1,1094224
7,NW_017567331.1,410030
8,NW_017567332.1,194470
9,NW_017567333.1,170548


### Astral does not use bootstrap

In [28]:
#run RAxML on "genes" that are 1M bp long in treeslider
ts4 = ipa.treeslider(
        name='Beet_1MB',  #this name can't end in _2
        data=dataB,
        workdir=OUTDIR,
        scaffold_idxs=range(40246),
        window_size=1000000,
        slide_size= 1000000,  
        inference_method="raxml",  #options are raxml and mrbayes
        inference_args={"N": 1, "T": 24}, #number of bootstraps and # cores
        consensus_reduce=True, # this tells it to make 1 sequence for each key in dictionary
        minsnps=100,  #within window_size, it must have at least this much diversity
        mincov=4,   #For example, mincov=0.5 will require that 50% of samples contain a site that is not N or - for the site to be included in the alignment. 
        rmincov=0,
        imap=imap,
        minmap= {i: 0.1 for i in imap}, #half of each key in imap dictionary
  #      keep_all_files=True,
)

In [46]:
# run treeslider/RAxML.  this takes a long time (8hrs)
# make sure there is no temp directory existing because then this won't run.
ts4.run(auto=True, force=True) #use auto=true when you have 24 engines

building database: nwindows=406; minsnps=100
[####################] 100% 0:01:48 | inferring trees 
tree_table written to /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Beet_1MB.tree_table.csv


In [47]:
ts4.tree_table.tail(10)

Unnamed: 0,scaffold,start,end,sites,snps,samples,missing,tree
396,186,35000000,36000000,1692,86,39,0.42,
397,186,36000000,37000000,1841,64,39,0.38,
398,186,37000000,38000000,1604,67,39,0.43,
399,186,38000000,39000000,2232,51,39,0.4,
400,186,39000000,40000000,2290,116,39,0.31,"(beet:0.101757,acanthochiton:1e-06,((tuberculatus:0.00712861,(floridanus:0.00432737,(greggii:0.00452659,arenicola:0.00535017)0:0.00164194)0:0.00144626)0:0.00285944,(pumilus:0.00471155,((cannabinus:0.0033566,australis:0.00559742)0:0.00366401,((((c..."
401,186,40000000,41000000,2528,109,39,0.42,"(acanthochiton:0.00204682,(greggii:0.00191318,(floridanus:0.00394392,arenicola:0.00237916)0:0.000528146)0:0.000921276,((pumilus:0.001749,tuberculatus:0.00773095)0:0.000771651,((cannabinus:0.00869436,(beet:0.0870944,australis:0.00242378)0:0.016527..."
402,186,41000000,42000000,1947,91,39,0.34,
403,186,42000000,43000000,2781,153,39,0.33,"(greggii:0.00155576,acanthochiton:0.00445366,(arenicola:0.00315252,((tuberculatus:0.00774119,(beet:0.128977,floridanus:1e-06)0:0.00230381)0:0.00613234,((pumilus:0.00483112,(australis:0.00183148,cannabinus:0.00205693)0:0.00497998)0:0.00136299,(((p..."
404,186,43000000,44000000,1718,82,39,0.28,
405,186,44000000,45000000,651,31,39,0.42,


Check out the Treeslider results

In [48]:
ts4.tree_table.describe()

Unnamed: 0,scaffold,start,end,sites,snps,samples,missing
count,406.0,406.0,406.0,406.0,406.0,406.0,406.0
mean,96.768,19550000.0,20550000.0,868.485,36.756,37.559,0.42
std,61.243,14490000.0,14490000.0,731.172,35.989,7.366,0.17
min,0.0,0.0,1000000.0,0.0,0.0,0.0,0.0
25%,42.0,7000000.0,8000000.0,293.75,9.0,39.0,0.33
50%,95.0,18000000.0,19000000.0,684.0,27.0,39.0,0.415
75%,144.0,30000000.0,31000000.0,1209.25,53.75,39.0,0.51
max,186.0,59000000.0,60000000.0,4236.0,221.0,39.0,0.87


In [49]:
Ast4 = ipa.astral(
    data= ts4.tree_table,
    name='Astral_Beet_1MB',
    workdir=OUTDIR,
    annotation=1,
)

Ast4.print_command()

java -jar /rigel/home/slh2181/miniconda3/bin/astral.5.7.1.jar -i /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/tmptrees.txt -o /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Beet_1MB.tre -t 1


In [50]:
Ast4.run()

[astral.5.7.1.jar]
inferred tree written to (/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Beet_1MB.tre)


In [51]:
rooted1= Ast4.tree.root("beet") #root on entire orange clade???
our_labels = rooted1.get_tip_labels()

sp_names= [i.split("_")[0].split("-")[0] for i in our_labels]
colors = [color_dict[sp] for sp in sp_names]

rooted1.treenode.support = 100
for node in rooted1.treenode.traverse():
    node.support = int(float(node.support))
    
canvas, axes, mark=rooted1.draw(tip_labels_align=True, node_labels="support", 
                          use_edge_lengths=False,                        
                          node_labels_style={"font-size": "15px"},
                        node_sizes=27,
                        node_style={
                                "fill": "white",
                                "stroke": "black",
                        },
                        tip_labels_colors=colors,
                        tip_labels_style={"font-size":"16px"},
                        width=600, height=800);

In [52]:
toyplot.svg.render(canvas,"/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Beet_1MB.svg")

### 500kb windows

In [36]:
#run RAxML on "genes" that are 1M bp long in treeslider
ts5 = ipa.treeslider(
        name='Beet_500kb',  #this name can't end in _2
        data=dataB,
        workdir=OUTDIR,
        scaffold_idxs=range(40246),
        window_size=500000,
        slide_size= 500000,  
        inference_method="raxml",  #options are raxml and mrbayes
        #inference_args={"N": 1, "T": 24}, #number of bootstraps and # cores
    consensus_reduce=True, # this tells it to make 1 sequence for each key in dictionary
        minsnps=50,  #within window_size, it must have at least this much diversity
        mincov=4,   #For example, mincov=0.5 will require that 50% of samples contain a site that is not N or - for the site to be included in the alignment. 
    rmincov= 0,     
    imap=imap,
    minmap= {i: 0.1 for i in imap}, #half of each key in imap dictionary
  #  keep_all_files=True,
)

In [37]:
ts5.run(force=True, auto=True)

building database: nwindows=885; minsnps=50
[####################] 100% 0:18:34 | inferring trees 
tree_table written to /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Beet_500kb.tree_table.csv


In [38]:
Ast500 = ipa.astral(
#    data = "/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/vble_locus_length/tub_consensus_500kb.tree_table.csv",
    data = ts5.tree_table,
    name='Astral_Beet_500kB',
    workdir=OUTDIR,
    annotation=1,
)

Ast500.print_command()

Ast500.run()

java -jar /rigel/home/slh2181/miniconda3/bin/astral.5.7.1.jar -i /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/tmptrees.txt -o /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Beet_500kB.tre -t 1
[astral.5.7.1.jar]
inferred tree written to (/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Beet_500kB.tre)


In [39]:
rooted500 = toytree.tree(Ast500.tree).root("beet")

rooted500.treenode.support = 100
for node in rooted500.treenode.traverse():
    node.support = int(float(node.support))
#r2 = rooted1.collapse_nodes(min_support=75)

our_labels = rooted500.get_tip_labels()
sp_names= [i.split("_")[0].split("-")[0] for i in our_labels]
colors = [color_dict[sp] for sp in sp_names]


canvas, axes, mark=rooted500.draw(tip_labels_align=True, node_labels="support", 
                          use_edge_lengths=False,                        
                          node_labels_style={"font-size": "15px"},
                        #node_markers="r2x1.25",
                        node_sizes=27,
                        node_style={
                                "fill": "white",
                                "stroke": "black",
                        #        "stroke-width": 1,
                        },
                         # tip_labels=newnames,
                        tip_labels_colors=colors,
                        tip_labels_style={"font-size":"16px"},
                        width=600, height=800);

In [40]:
toyplot.svg.render(canvas,"/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Beet_500kB.svg")

### 250KB windows

In [41]:
#run RAxML on "genes" that are 1M bp long in treeslider
ts6 = ipa.treeslider(
        name='Beet_250kb',  #this name can't end in _2
        data=dataB,
        workdir=OUTDIR,
        scaffold_idxs=range(40246),
        window_size=250000,
        slide_size= 250000,  
        inference_method="raxml",  #options are raxml and mrbayes
       # inference_args={"N": 100, "T": 24}, #number of bootstraps and # cores
    consensus_reduce=True, # this tells it to make 1 sequence for each key in dictionary
        minsnps=25,  #within window_size, it must have at least this much diversity
        mincov=4,   #For example, mincov=0.5 will require that 50% of samples contain a site that is not N or - for the site to be included in the alignment. 
    rmincov= 0,     
    imap=imap,
    minmap= {i: 0.1 for i in imap}, #half of each key in imap dictionary
 #   keep_all_files=True,
)

In [42]:
ts6.run(force=True, auto=True)

building database: nwindows=1904; minsnps=25
[####################] 100% 0:23:02 | inferring trees 
tree_table written to /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Beet_250kb.tree_table.csv


In [43]:
Ast250 = ipa.astral(
    #data = "/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/vble_locus_length/tub_consensus_250kb.tree_table.csv",
    data = ts6.tree_table,
    name='Astral_Beet_250kB',
    workdir=OUTDIR,
    annotation=1,
)

Ast250.print_command()

Ast250.run()

java -jar /rigel/home/slh2181/miniconda3/bin/astral.5.7.1.jar -i /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/tmptrees.txt -o /rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Beet_250kB.tre -t 1
[astral.5.7.1.jar]
inferred tree written to (/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Beet_250kB.tre)


In [44]:
rooted250 = toytree.tree(Ast250.tree).root("beet")

rooted250.treenode.support = 100
for node in rooted250.treenode.traverse():
    node.support = int(float(node.support))
#r2 = rooted1.collapse_nodes(min_support=75)

our_labels = rooted250.get_tip_labels()
sp_names= [i.split("_")[0].split("-")[0] for i in our_labels]
colors = [color_dict[sp] for sp in sp_names]


canvas, axes, mark=rooted250.draw(tip_labels_align=True, node_labels="support", 
                          use_edge_lengths=False,                        
                          node_labels_style={"font-size": "15px"},
                        #node_markers="r2x1.25",
                        node_sizes=27,
                        node_style={
                                "fill": "white",
                                "stroke": "black",
                        #        "stroke-width": 1,
                        },
                         # tip_labels=newnames,
                        tip_labels_colors=colors,
                        tip_labels_style={"font-size":"16px"},
                        width=600, height=800);

In [45]:
toyplot.svg.render(canvas,"/rigel/dsi/users/slh2181/tuberculatus_plate/asplundii/hybridus_redo/Outgroup_reference/Astral_Beet_250kB.svg")