# Make a maximum likelihood tree with 3RAD data for all Taiwan and nearby Chinese samples
### Both individual samples and consensus sequences

Last run: March 4, 2021

In [1]:
import ipyrad as ip
import ipyrad.analysis as ipa
import pandas as pd
import toytree

print('ipyrad', ipa.__version__)

ipyrad 0.9.63


## Window extracter to make a tree with these data

In [2]:
# first load the data file with no other arguments to see scaffold table
data = "/rigel/dsi/users/slh2181/Pmontana/kudzu91_noHC_min20_100Loci_outfiles/kudzu91_noHC_min20_100Loci.seqs.hdf5"
OUTDIR = "/rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan"

In [3]:
# group individuals into populations
imap = { 
#"AL12": ["AL12-10KA","AL12-10KA","AL12-12KA","AL12-14KA","AL12-16KA","AL12-18KA","AL12-19KA","AL12-1KA","AL12-5KA","AL12-6KA"],
#"AL5": ["AL5-11KA","AL5-14KA","AL5-15KA","AL5-16KA","AL5-18KA","AL5-19KA","AL5-1KA","AL5-21KA","AL5-2KA","AL5-2KA","AL5-5KA"],
#"AR2": ["AR2-10KA","AR2-18KA","AR2-1KA","AR2-22KA","AR2-2KA","AR2-2KA","AR2-3KA"],
#"FL3": ["FL3-17K","FL3-1KA","FL3-4KA","FL3-7KA","FL3-8K"],
#"GA3": ["GA3-10K","GA3-11K","GA3-13K","GA3-17K","GA3-21K","GA3-23K","GA3-24K"],
#"GA34": ["GA34-11A","GA34-12A","GA34-13A","GA34-19A","GA34-22A","GA34-23A","GA34-2A","GA34-6A","GA34-7A","GA34-9A"],
#"GA36": ["GA36-10KA","GA36-13KA","GA36-14KA","GA36-21KA","GA36-3KA","GA36-6KA","GA36-6KA"],
#"GA3": ["GA3-7K"],
#"GA96": ["GA96-10KA","GA96-10KA","GA96-11KA","GA96-12KA","GA96-14KA","GA96-16KA","GA96-20KA","GA96-23KA","GA96-2KA","GA96-3KA","GA96-4KA","GA96-8KA","GA96-9KA"],
#"KY11": ["KY11-11KA","KY11-11KA","KY11-12KA","KY11-13KA","KY11-14KA","KY11-16KA","KY11-17KA","KY11-20KA","KY11-4KA","KY11-7KA","KY11-8KA"],
#"KY7": ["KY7-12KC","KY7-14KC","KY7-17KC","KY7-20KC","KY7-20KC","KY7-8KC"],
#"KY8": ["KY8-12KB","KY8-13KB","KY8-14KB","KY8-17KB","KY8-1KB","KY8-22KC","KY8-23KC","KY8-24KC","KY8-7KB","KY8-8KB"],
#"NY4": ["NY4-18K","NY4-1K","NY4-1K","NY4-23K","NY4-4K","NY4-9K"],
#"OK1": ["OK1-10KA","OK1-15KA","OK1-20KA","OK1-24K","OK1-24K","OK1-5KA"],
#"SC4": ["SC4-11K","SC4-11K","SC4-15KB","SC4-17KB","SC4-18KB","SC4-20KB","SC4-22K","SC4-2KA","SC4-3K","SC4-4K","SC4-5K","SC4-9K"],
#"TN6": ["TN6-11K","TN6-15KA","TN6-1K","TN6-21K","TN6-24K","TN6-2K","TN6-9K"],
#"TN8": ["TN8-10KA","TN8-10KA","TN8-11KA","TN8-13KA","TN8-14KA","TN8-15KA","TN8-20KA","TN8-23KA","TN8-24KA","TN8-27KA","TN8-3KA","TN8-9KA"],
#"TX4": ["TX4-10KA","TX4-11KA","TX4-13KA","TX4-14KA","TX4-19KA","TX4-1KA","TX4-20KA","TX4-21A","TX4-21A","TX4-21KA","TX4-2KA","TX4-3KA","TX4-7KA"],
#"TX5": ["TX5-10KA","TX5-10KA","TX5-11KA","TX5-14KA","TX5-15KA","TX5-17KA","TX5-1KA","TX5-24KA","TX5-25KA","TX5-7KA","TX5-8KA"],
#"WV1": ["WV1-12KA","WV1-19KAC1","WV1-20KA","WV1-20KA","WV1-21KAC","WV1-22K","WV1-23KAC","WV1-24KAC","WV1-7KAC1","WV1-8KAC1"],
#"MD1": ["MD1-10K","MD1-11K","MD1-12K","MD1-18K","MD1-19K","MD1-1K","MD1-22K","MD1-24K","MD1-4K","MD1-9K"],
#"MO1": ["MO1-14KA","MO1-1KA","MO-18KA"],
#"MS4": ["MS4-12KA","MS4-12KA","MS4-16KA","MS4-17KA","MS4-1KA","MS4-20KA","MS4-23KA","MS4-24KA","MS4-5KA","MS4-6KA","MS4-7KA"],
#"MS6": ["MS6-11KA","MS6-15KA","MS6-21KA","MS6-21KA","MS6-3KB","MS6-4KA","MS6-5KA","MS6-7KB","MS6-8KB"],
#"NC13": ["NC13-15KA","NC13-15KA","NC13-16KA","NC13-19KA","NC13-1KA","NC13-21KA","NC13-24KA","NC13-2KA","NC13-6KA","NC13-8KA"],
#"NC20": ["NC20-14KA","NC20-14KA","NC20-15KA","NC20-17KA","NC20-18KA","NC20-1KA","NC20-25KA","NC20-26KAMS","NC20-28KAMS","NC20-34KAMS","NC20-3KA","NC20-7KA"],
#"NC21": ["NC21-10K","NC21-10K","NC21-11K","NC21-1K","NC21-8K","NC21-9K"],
#"NC3": ["NC3-14K","NC3-14K","NC3-15K","NC3-16K","NC3-17K","NC3-19K","NC3-22K","NC3-24K","NC3-4K","NC3-5K","NC3-6K","NC3-9K"],
#"NC6": ["NC6-11KA","NC6-11KA","NC6-15KA","NC6-19KA","NC6-1KA","NC6-24KA","NC6-25KA","NC6-2KA","NC6-5KA","NC6-6KA","NC6-7KA","NC6-9K"],
#"NE1": ["NE1-1A","NE1-2A","NE1-3A","NE1-4A","NE1-5A"],
#"KJP11": ["KJP11-10A","KJP11-10A","KJP11-13A","KJP11-16A","KJP11-18A","KJP11-20A","KJP11-21A","KJP11-22A","KJP11-3A","KJP11-5A","KJP11-7A"],
#"KJP14": ["KJP14-11A","KJP14-11A","KJP14-12A","KJP14-14A","KJP14-15A","KJP14-17A","KJP14-19A","KJP14-1A","KJP14-20A","KJP14-3A","KJP14-8A"],
#"KJP17": ["KJP17-11A","KJP17-12A","KJP17-19A","KJP17-1A","KJP17-23A"],
#"KJP18": ["KJP18-10A","KJP18-10A","KJP18-11A","KJP18-13A","KJP18-17A","KJP18-1A","KJP18-21A","KJP18-24A","KJP18-25","KJP18-4A","KJP18-9A"],
#"KJP2": ["KJP2-11A","KJP2-16A","KJP2-17A","KJP2-18A","KJP2-1A","KJP2-22A","KJP2-23A","KJP22-5A","KJP2-25A","KJP22-9A","KJP2-2A"],
#"KJP22": ["KJP22-10A","KJP22-14A","KJP22-17A","KJP22-18A","KJP22-19A","KJP22-21A","KJP22-22A","KJP22-23A","KJP2-22A","KJP2-23A","KJP22-5A","KJP2-25A","KJP22-9A"],
#"KJP23": ["KJP23-10A","KJP23-10A","KJP23-12A","KJP23-13A","KJP23-18A","KJP23-19A","KJP23-1A","KJP23-3A","KJP23-3A","KJP23-4A","KJP23-5A","KJP23-7A"],
#"KJP25": ["KJP25-10A","KJP25-12A","KJP25-15A","KJP25-16A","KJP25-19A","KJP25-22A","KJP25-22A","KJP25-23A","KJP25-25A","KJP25-4A","KJP25-6A"],
#"KJP27": ["KJP27-11A","KJP27-14A","KJP27-18A","KJP27-1A","KJP27-22A","KJP27-24A","KJP27-6A","KJP27-8A","KJP27-9A"],
#"KJP3": ["KJP3-11A","KJP3-11A","KJP3-12A","KJP3-13A","KJP3-16A","KJP3-17A","KJP3-20A","KJP3-21A","KJP3-22A","KJP3-2A","KJP3-3A"],
#"KJP5": ["KJP5-14A","KJP5-15A","KJP5-19A","KJP5-1A","KJP5-23A","KJP5-4","KJP5-5","KJP5-6","KJP5-7A","KJP5-9A"],
#"KJP8": ["KJP8-11A","KJP8-13A","KJP8-17A","KJP8-18A","KJP8-1A","KJP8-20A","KJP8-23A","KJP8-5A","KJP8-7A","KJP8-8A"],
#"KKO19": ["KKO19-11","KKO19-11A","KKO19-12","KKO19-13","KKO19-18","KKO19-1A","KKO19-21","KKO19-22","KKO19-23","KKO19-4A","KKO19-9A"],
#"KKO22": ["KKO22-10","KKO22-10B","KKO22-11","KKO22-12","KKO22-16","KKO22-19","KKO22-1A","KKO22-23","KKO22-24","KKO22-4B","KKO22-6B"],
#"KKO3": ["KKO3-10A","KKO3-10A","KKO3-11A","KKO3-12A","KKO3-14A","KKO3-15A","KKO3-19A","KKO3-1A","KKO3-20A","KKO3-21A","KKO3-3A"],
#"KKO5": ["KKO5-10A","KKO5-10A","KKO5-13A","KKO5-14A","KKO5-18A","KKO5-19A","KKO5-21A","KKO-5-22A","KKO5-24A","KKO5-2A","KKO5-3A"],
#"KKO6": ["KKO6-10","KKO6-10","KKO6-13","KKO6-16","KKO6-2","KKO6-23","KKO6-4","KKO6-6","KKO6-7","KKO6-8","KKO6-9"],
#"KLN1": ["KLN1-13KB","KLN1-18KB","KLN1-1KB"],
#"KLN2": ["KLN2-1A","KLN2-1A","KLN2-2A","KLN2-3A","KLN2-4A","KLN2-5A"],
#"KSA1": ["KSA1-10A","KSA1-10A","KSA1-11","KSA1-12","KSA1-14","KSA1-18","KSA1-1A","KSA1-2","KSA1-21","KSA1-22","KSA1-24"],
#"KSA2": ["KSA2-10A","KSA2-10A","KSA2-12","KSA2-13","KSA2-14","KSA2-15","KSA2-16","KSA2-19","KSA2-20","KSA2-22","KSA2-27"],
#"KSA3": ["KSA3-11A","KSA3-12A","KSA3-13A","KSA3-14A","KSA3-15A","KSA3-19A","KSA3-1A","KSA3-3A","KSA3-5A","KSA3-8A"],
#"KSC1": ["KSC1-11A","KSC1-14A","KSC1-19A","KSC1-20A","KSC1-21A","KSC1-22A","KSC1-24A","KSC1-6A","KSC1-7A","KSC1-8A"],
#"KSC4": ["KSC4-10KB","KSC4-12KB","KSC4-16KB","KSC4-1KB","KSC4-20KB","KSC4-21KB","KSC4-5KB","KSC4-6KB","KSC4-7KB","KSC4-8KB"],
#"KSD1": ["KSD1-10","KSD1-10","KSD1-12","KSD1-13","KSD1-14","KSD1-15","KSD1-19","KSD1-20","KSD1-23","KSD1-24","KSD1-25"],
#"KSD2": ["KSD2-10A","KSD2-11A","KSD2-12A","KSD2-13A"],
#"KYU2": ["KYU2-10A","KYU2-16A","KYU2-18A","KYU2-19A","KYU2-21A","KYU2-22A","KYU2-3A","KYU2-5A","KYU2-6A","KYU2-9A"],
#"KYU7": ["KYU7-10A","KYU7-13A","KYU7-14A","KYU7-17A","KYU7-17A","KYU7-18A","KYU7-19A","KYU7-1A","KYU7-21A","KYU7-2A","KYU7-6A"],
#"KZH10": ["KZH10-10A","KZH10-10A","KZH10-12A","KZH10-12A","KZH10-14A","KZH10-16A","KZH10-18A","KZH10-19A","KZH10-23A","KZH10-4A","KZH10-6A","KZH10-8A"],
#"KZH8": ["KZH8-11A","KZH8-11A","KZH8-13A","KZH8-14A","KZH8-16A","KZH8-18A","KZH8-19A","KZH8-20A","KZH8-21A","KZH8-22A","KZH8-8A"],
#"KAN1": ["KAN1-10A","KAN1-11","KAN1-15A","KAN1-20","KAN1-3"],
#"KAN4": ["KAN4-14A","KAN4-20A","KAN4-23A"],
#"KAN5": ["KAN5-1A","KAN5-1A","KAN5-2A","KAN5-3A","KAN5-4A","KAN5-5A"],
#"KB1": ["KB1-1","KB1-1","KB1-10","KB1-13","KB1-14","KB1-4"],
"KFU7": ["KFU7-10K","KFU7-11K","KFU7-12K","KFU7-16K","KFU7-21K","KFU7-25K","KFU7-2KA","KFU7-5A","KFU7-7KA","KFU7-8KA"],
"KGD3": ["KGD3-12A","KGD3-13A","KGD3-16A","KGD3-17A","KGD3-18A","KGD3-19A","KGD3-20A","KGD3-23A","KGD3-2A","KGD3-3A"],#"KGD3-5A"],
"KGD4": ["KGD4-10","KGD4-11A","KGD4-19","KGD4-1A","KGD4-24"], #"KGD4-13A","KGD4-14A","KGD4-23","KGD4-2A","KGD4-7A"
"KGD5": ["KGD5-6A","KGD5-7A"],
"KGU1": ["KGU1-10A","KGU1-11A","KGU1-12A","KGU1-13A","KGU1-1A","KGU1-20A","KGU1-22A","KGU1-3A","KGU1-6A","KGU1-7A","KGU1-9A"],
"KHN10": ["KHN10-10A","KHN10-13A","KHN10-15A","KHN10-17A","KHN10-18A","KHN10-1A","KHN10-22A","KHN10-23A","KHN10-24A"],#"KHN10-7A"],
"KHN11": ["KHN11-15A","KHN11-1A","KHN11-8A"],#"KHN11-6A",
"KHN5": ["KHN5-10","KHN5-11A","KHN5-12A","KHN5-13A","KHN5-15A","KHN5-16A","KHN5-17A","KHN5-1A","KHN5-22A","KHN5-24A"],
"KHN7": ["KHN7-10A","KHN7-11A","KHN7-16A","KHN7-17A","KHN7-19A","KHN7-22A","KHN7-2A","KHN7-5A","KHN7-6A","KHN7-7A"],
"KHN9": ["KHN9-10A","KHN9-12A","KHN9-13A","KHN9-14A","KHN9-17A","KHN9-20A","KHN9-21A","KHN9-22A","KHN9-24A","KHN9-7A"],
#"KHU3": ["KHU-16A","KHU3-11A","KHU3-11A","KHU3-12A","KHU3-13A","KHU3-14A","KHU3-15A","KHU3-17A","KHU3-19A","KHU3-1A","KHU3-2A"],
#"KHU2": ["KHU2-10A","KHU2-10A","KHU2-11A","KHU2-14A","KHU2-15A","KHU2-16A","KHU2-22A","KHU2-2A","KHU2-4A","KHU2-6A","KHU2-9A"],
#"KJI2": ["KJI2-11A","KJI2-11A","KJI2-13A","KJI2-14A","KJI2-15A","KJI2-16A","KJI2-18A","KJI2-24A","KJI2-25A","KJI2-6A","KJI2-7A"],
#"KJI5": ["KJI5-10A","KJI5-10A","KJI5-11A","KJI5-18A","KJI5-24A","KJI5-2A"],
#"KJIX2": ["KJIX2-12A","KJIX2-12A","KJIX2-15A","KJIX2-17A","KJIX2-1A","KJIX2-23A","KJIX2-24A","KJIX2-25A","KJIX2-26A","KJIX2-28A","KJIX2-3A"],
"KTW1": ["KTW1-11A","KTW1-14A","KTW1-15A","KTW1-1A","KTW1-22A","KTW1-23A","KTW1-3A","KTW1-4A","KTW1-7A","KTW1-9A"],
"KTW4": ["KTW4-12B","KTW4-13B","KTW4-15B","KTW4-17B","KTW4-18B","KTW4-19B","KTW4-20B","KTW4-21B","KTW4-22B","KTW4-24B"],
"KTW6": ["KTW6-12A","KTW6-15A","KTW6-17A","KTW6-18A","KTW6-1A","KTW6-23A","KTW6-24A","KTW6-2A","KTW6-5A","KTW6-7A"],
#"NI1": ["NI1-8K","NI2-14K"],
#"soybean":["soybean"] #If I include this, it makes a samples called soybean that is entirely missing data and raxml quits1
}

In [4]:
# select a scaffold idx, start, and end positions
T_denovo = ipa.window_extracter(
    data = data,
    name="TW.SCh_denovo3",
    workdir=OUTDIR,
    scaffold_idxs=range(16336),
    mincov=3,
    imap=imap,
    minmap= {i: 0.25 for i in imap}, #half of each key in imap dictionary,
    consensus_reduce=False,
)

#write output to file
T_denovo.run(force=True)
# show stats of the window
T_denovo.stats

Wrote data to /rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan/TW.SCh_denovo3.phy


Unnamed: 0,scaffold,start,end,sites,snps,missing,samples
0,concatenated,0,161939,161939,6985,0.112,110


In [5]:
# init raxml object with input data and (optional) parameter options
rax = ipa.raxml(data=T_denovo.outfile, T=24, N=100, m = "GTRGAMMA", name="TaiwanCh-denovo3",
               workdir="/rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan")

# print the raxml command string for prosperity
print(rax.command)

# run the command, (options: block until finishes; overwrite existing)
rax.run(block=True, force=True)

/rigel/home/slh2181/miniconda3/bin/raxmlHPC-PTHREADS-AVX2 -f a -T 24 -m GTRGAMMA -n TaiwanCh-denovo3 -w /rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan -s /rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan/TW.SCh_denovo3.phy -p 54321 -N 100 -x 12345
job TaiwanCh-denovo3 finished successfully


In [6]:
# plot the tree for this genome window
tre = toytree.tree(rax.trees.bipartitions)
#tre = toytree.tree("/rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan/RAxML_bipartitions.TW.SCh_denovo3")
rtre = tre.root(["KHN10-24A",
"KHN10-10A",
"KHN10-23A",
"KHN10-15A",
"KHN10-17A",
"KHN10-18A",
"KHN10-13A",
"KHN10-1A",
"KHN10-22A",
"KHN9-22A",
"KHN9-21A",
"KHN9-20A",
"KHN9-12A",
"KHN9-7A",
"KHN9-17A",
"KHN9-10A",
"KHN9-14A",
"KHN9-24A",
"KHN9-13A",
"KHN5-13A",
"KHN5-24A",
"KHN5-15A",
"KHN5-16A",
"KHN5-22A",
"KHN5-17A",
"KHN5-11A",
"KHN5-1A",
"KHN5-12A",
"KHN5-10",
"KGD5-6A",
"KGD5-7A",
"KHN11-1A",
"KHN11-15A",
"KHN11-8A"]).collapse_nodes(min_support=50)

colorlist = ["navy" if "KTW1" in tip else 
             "cornflowerblue" if "KTW4" in tip else
             "deepskyblue" if "KTW6" in tip else
             "brown" if "KFU7" in tip else
             "darkorange" if "KGD3" in tip else
             "gold" if "KGD4" in tip else
             "sandybrown" if "KGD5" in tip else
             "purple" if "KGU1" in tip else
             "red" if "KHN5" in tip else
             "deeppink" if "KHN9" in tip else
             "magenta" if "KHN7" in tip else
             "violet" if "KHN10" in tip else
             "mediumvioletred" for tip in rtre.get_tip_labels()]

canvas, axes, mark = rtre.draw(node_labels="support",# use_edge_lengths=False,
                        #tip_labels_align=True,              
                          node_labels_style={"font-size": "11.5px"},
                        node_sizes=21,
                        node_style={
                                "fill": "white",
                                "stroke": "black",
                        },
                        tip_labels_colors=colorlist,
                        tip_labels_style={"font-size":"15px"},
                        width=600, height=1500
                              );

In [9]:
import toyplot.svg
toyplot.svg.render(canvas, "/rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan/Taiwan.fullChina_denovo3.svg")

In [7]:
rtre.newick

'((((KHN10-22A:0.000327898,KHN10-1A:0.000312044)95:0.000400213,(KHN10-13A:2.3905e-05,(KHN10-17A:1e-06,KHN10-18A:1e-06)80:2.0116e-05)100:0.000701892,((KHN10-23A:3.85273e-05,KHN10-15A:1e-06)56:3.35257e-05,KHN10-24A:1e-06,KHN10-10A:1e-06)100:0.000488231)98:0.000801545,((KHN9-20A:0.000167864,(KHN9-21A:3.80995e-05,KHN9-22A:0.000102405)68:5.36845e-05)99:0.000799244,(KHN9-17A:4.64429e-05,(KHN9-14A:4.60261e-05,KHN9-10A:2.82721e-05)98:7.26187e-05)100:0.000770631,(KHN9-7A:8.6724e-05,KHN9-12A:6.81377e-05)100:0.000917129,((KHN5-13A:1.78247e-05,KHN5-24A:6.49976e-05,KHN5-11A:3.83353e-05,KHN5-17A:1.89542e-05,KHN5-1A:1e-06,KHN5-12A:6.04654e-05,KHN5-15A:0.000101781,(KHN5-22A:2.16561e-05,KHN5-16A:3.22853e-05)68:0.000100921)97:0.000787933,KHN5-10:0.000506163,((KHN11-8A:1e-06,(KHN11-15A:0.000130914,KHN11-1A:1e-06)94:1.99394e-05)100:0.00115051,(KGD5-7A:1.14246e-05,KGD5-6A:8.55735e-06)98:0.000804898,(KHN9-13A:3.94559e-05,KHN9-24A:1.77809e-05)100:0.00110375)79:0.000649703)90:0.000593746)81:0.000495372)100:0.

In [8]:
tre.root(["KHN10-24A",
"KHN10-10A",
"KHN10-23A",
"KHN10-15A",
"KHN10-17A",
"KHN10-18A",
"KHN10-13A",
"KHN10-1A",
"KHN10-22A",
"KHN9-22A",
"KHN9-21A",
"KHN9-20A",
"KHN9-12A",
"KHN9-7A",
"KHN9-17A",
"KHN9-10A",
"KHN9-14A",
"KHN9-24A",
"KHN9-13A",
"KHN5-13A",
"KHN5-24A",
"KHN5-15A",
"KHN5-16A",
"KHN5-22A",
"KHN5-17A",
"KHN5-11A",
"KHN5-1A",
"KHN5-12A",
"KHN5-10",
"KGD5-6A",
"KGD5-7A",
"KHN11-1A",
"KHN11-15A",
"KHN11-8A"]).newick

'((((KHN10-22A:0.000327898,KHN10-1A:0.000312044)95:0.000400213,((KHN10-13A:2.3905e-05,(KHN10-17A:1e-06,KHN10-18A:1e-06)80:2.0116e-05)100:0.000701892,((KHN10-24A:1e-06,KHN10-10A:1e-06)41:1e-06,(KHN10-23A:3.85273e-05,KHN10-15A:1e-06)56:3.35257e-05)100:0.000488231)31:0.000328462)98:0.000801545,((KHN9-20A:0.000167864,(KHN9-21A:3.80995e-05,KHN9-22A:0.000102405)68:5.36845e-05)99:0.000799244,((KHN9-17A:4.64429e-05,(KHN9-14A:4.60261e-05,KHN9-10A:2.82721e-05)98:7.26187e-05)100:0.000770631,((KHN9-7A:8.6724e-05,KHN9-12A:6.81377e-05)100:0.000917129,((KHN5-10:0.000506163,((KHN11-8A:1e-06,(KHN11-15A:0.000130914,KHN11-1A:1e-06)94:1.99394e-05)100:0.00115051,((KGD5-7A:1.14246e-05,KGD5-6A:8.55735e-06)98:0.000804898,(KHN9-13A:3.94559e-05,KHN9-24A:1.77809e-05)100:0.00110375)8:0.000206464)79:0.000649703)1:0.00013514,((KHN5-13A:1.78247e-05,(KHN5-11A:3.83353e-05,KHN5-17A:1.89542e-05)33:8.05449e-05)9:0.000138454,(KHN5-24A:6.49976e-05,(KHN5-1A:1e-06,(KHN5-12A:6.04654e-05,(KHN5-15A:0.000101781,(KHN5-22A:2.16561

# Now do consensus reduce to get a topology for BPP

In [None]:
import ipyrad as ip
import ipyrad.analysis as ipa
import pandas as pd
import toytree

print('ipyrad', ipa.__version__)

In [2]:
# first load the data file with no other arguments to see scaffold table
data = "/rigel/dsi/users/slh2181/Pmontana/kudzu91_noHC_min20_100Loci_outfiles/kudzu91_noHC_min20_100Loci.seqs.hdf5"
OUTDIR = "/rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan"

In [4]:
# group individuals into populations
imap = { 
#"AL12": ["AL12-10KA","AL12-10KA","AL12-12KA","AL12-14KA","AL12-16KA","AL12-18KA","AL12-19KA","AL12-1KA","AL12-5KA","AL12-6KA"],
#"AL5": ["AL5-11KA","AL5-14KA","AL5-15KA","AL5-16KA","AL5-18KA","AL5-19KA","AL5-1KA","AL5-21KA","AL5-2KA","AL5-2KA","AL5-5KA"],
#"AR2": ["AR2-10KA","AR2-18KA","AR2-1KA","AR2-22KA","AR2-2KA","AR2-2KA","AR2-3KA"],
#"FL3": ["FL3-17K","FL3-1KA","FL3-4KA","FL3-7KA","FL3-8K"],
#"GA3": ["GA3-10K","GA3-11K","GA3-13K","GA3-17K","GA3-21K","GA3-23K","GA3-24K"],
#"GA34": ["GA34-11A","GA34-12A","GA34-13A","GA34-19A","GA34-22A","GA34-23A","GA34-2A","GA34-6A","GA34-7A","GA34-9A"],
#"GA36": ["GA36-10KA","GA36-13KA","GA36-14KA","GA36-21KA","GA36-3KA","GA36-6KA","GA36-6KA"],
#"GA3": ["GA3-7K"],
#"GA96": ["GA96-10KA","GA96-10KA","GA96-11KA","GA96-12KA","GA96-14KA","GA96-16KA","GA96-20KA","GA96-23KA","GA96-2KA","GA96-3KA","GA96-4KA","GA96-8KA","GA96-9KA"],
#"KY11": ["KY11-11KA","KY11-11KA","KY11-12KA","KY11-13KA","KY11-14KA","KY11-16KA","KY11-17KA","KY11-20KA","KY11-4KA","KY11-7KA","KY11-8KA"],
#"KY7": ["KY7-12KC","KY7-14KC","KY7-17KC","KY7-20KC","KY7-20KC","KY7-8KC"],
#"KY8": ["KY8-12KB","KY8-13KB","KY8-14KB","KY8-17KB","KY8-1KB","KY8-22KC","KY8-23KC","KY8-24KC","KY8-7KB","KY8-8KB"],
#"NY4": ["NY4-18K","NY4-1K","NY4-1K","NY4-23K","NY4-4K","NY4-9K"],
#"OK1": ["OK1-10KA","OK1-15KA","OK1-20KA","OK1-24K","OK1-24K","OK1-5KA"],
#"SC4": ["SC4-11K","SC4-11K","SC4-15KB","SC4-17KB","SC4-18KB","SC4-20KB","SC4-22K","SC4-2KA","SC4-3K","SC4-4K","SC4-5K","SC4-9K"],
#"TN6": ["TN6-11K","TN6-15KA","TN6-1K","TN6-21K","TN6-24K","TN6-2K","TN6-9K"],
#"TN8": ["TN8-10KA","TN8-10KA","TN8-11KA","TN8-13KA","TN8-14KA","TN8-15KA","TN8-20KA","TN8-23KA","TN8-24KA","TN8-27KA","TN8-3KA","TN8-9KA"],
#"TX4": ["TX4-10KA","TX4-11KA","TX4-13KA","TX4-14KA","TX4-19KA","TX4-1KA","TX4-20KA","TX4-21A","TX4-21A","TX4-21KA","TX4-2KA","TX4-3KA","TX4-7KA"],
#"TX5": ["TX5-10KA","TX5-10KA","TX5-11KA","TX5-14KA","TX5-15KA","TX5-17KA","TX5-1KA","TX5-24KA","TX5-25KA","TX5-7KA","TX5-8KA"],
#"WV1": ["WV1-12KA","WV1-19KAC1","WV1-20KA","WV1-20KA","WV1-21KAC","WV1-22K","WV1-23KAC","WV1-24KAC","WV1-7KAC1","WV1-8KAC1"],
#"MD1": ["MD1-10K","MD1-11K","MD1-12K","MD1-18K","MD1-19K","MD1-1K","MD1-22K","MD1-24K","MD1-4K","MD1-9K"],
#"MO1": ["MO1-14KA","MO1-1KA","MO-18KA"],
#"MS4": ["MS4-12KA","MS4-12KA","MS4-16KA","MS4-17KA","MS4-1KA","MS4-20KA","MS4-23KA","MS4-24KA","MS4-5KA","MS4-6KA","MS4-7KA"],
#"MS6": ["MS6-11KA","MS6-15KA","MS6-21KA","MS6-21KA","MS6-3KB","MS6-4KA","MS6-5KA","MS6-7KB","MS6-8KB"],
#"NC13": ["NC13-15KA","NC13-15KA","NC13-16KA","NC13-19KA","NC13-1KA","NC13-21KA","NC13-24KA","NC13-2KA","NC13-6KA","NC13-8KA"],
#"NC20": ["NC20-14KA","NC20-14KA","NC20-15KA","NC20-17KA","NC20-18KA","NC20-1KA","NC20-25KA","NC20-26KAMS","NC20-28KAMS","NC20-34KAMS","NC20-3KA","NC20-7KA"],
#"NC21": ["NC21-10K","NC21-10K","NC21-11K","NC21-1K","NC21-8K","NC21-9K"],
#"NC3": ["NC3-14K","NC3-14K","NC3-15K","NC3-16K","NC3-17K","NC3-19K","NC3-22K","NC3-24K","NC3-4K","NC3-5K","NC3-6K","NC3-9K"],
#"NC6": ["NC6-11KA","NC6-11KA","NC6-15KA","NC6-19KA","NC6-1KA","NC6-24KA","NC6-25KA","NC6-2KA","NC6-5KA","NC6-6KA","NC6-7KA","NC6-9K"],
#"NE1": ["NE1-1A","NE1-2A","NE1-3A","NE1-4A","NE1-5A"],
#"KJP11": ["KJP11-10A","KJP11-10A","KJP11-13A","KJP11-16A","KJP11-18A","KJP11-20A","KJP11-21A","KJP11-22A","KJP11-3A","KJP11-5A","KJP11-7A"],
#"KJP14": ["KJP14-11A","KJP14-11A","KJP14-12A","KJP14-14A","KJP14-15A","KJP14-17A","KJP14-19A","KJP14-1A","KJP14-20A","KJP14-3A","KJP14-8A"],
#"KJP17": ["KJP17-11A","KJP17-12A","KJP17-19A","KJP17-1A","KJP17-23A"],
#"KJP18": ["KJP18-10A","KJP18-10A","KJP18-11A","KJP18-13A","KJP18-17A","KJP18-1A","KJP18-21A","KJP18-24A","KJP18-25","KJP18-4A","KJP18-9A"],
#"KJP2": ["KJP2-11A","KJP2-16A","KJP2-17A","KJP2-18A","KJP2-1A","KJP2-22A","KJP2-23A","KJP22-5A","KJP2-25A","KJP22-9A","KJP2-2A"],
#"KJP22": ["KJP22-10A","KJP22-14A","KJP22-17A","KJP22-18A","KJP22-19A","KJP22-21A","KJP22-22A","KJP22-23A","KJP2-22A","KJP2-23A","KJP22-5A","KJP2-25A","KJP22-9A"],
#"KJP23": ["KJP23-10A","KJP23-10A","KJP23-12A","KJP23-13A","KJP23-18A","KJP23-19A","KJP23-1A","KJP23-3A","KJP23-3A","KJP23-4A","KJP23-5A","KJP23-7A"],
#"KJP25": ["KJP25-10A","KJP25-12A","KJP25-15A","KJP25-16A","KJP25-19A","KJP25-22A","KJP25-22A","KJP25-23A","KJP25-25A","KJP25-4A","KJP25-6A"],
#"KJP27": ["KJP27-11A","KJP27-14A","KJP27-18A","KJP27-1A","KJP27-22A","KJP27-24A","KJP27-6A","KJP27-8A","KJP27-9A"],
#"KJP3": ["KJP3-11A","KJP3-11A","KJP3-12A","KJP3-13A","KJP3-16A","KJP3-17A","KJP3-20A","KJP3-21A","KJP3-22A","KJP3-2A","KJP3-3A"],
#"KJP5": ["KJP5-14A","KJP5-15A","KJP5-19A","KJP5-1A","KJP5-23A","KJP5-4","KJP5-5","KJP5-6","KJP5-7A","KJP5-9A"],
#"KJP8": ["KJP8-11A","KJP8-13A","KJP8-17A","KJP8-18A","KJP8-1A","KJP8-20A","KJP8-23A","KJP8-5A","KJP8-7A","KJP8-8A"],
#"KKO19": ["KKO19-11","KKO19-11A","KKO19-12","KKO19-13","KKO19-18","KKO19-1A","KKO19-21","KKO19-22","KKO19-23","KKO19-4A","KKO19-9A"],
#"KKO22": ["KKO22-10","KKO22-10B","KKO22-11","KKO22-12","KKO22-16","KKO22-19","KKO22-1A","KKO22-23","KKO22-24","KKO22-4B","KKO22-6B"],
#"KKO3": ["KKO3-10A","KKO3-10A","KKO3-11A","KKO3-12A","KKO3-14A","KKO3-15A","KKO3-19A","KKO3-1A","KKO3-20A","KKO3-21A","KKO3-3A"],
#"KKO5": ["KKO5-10A","KKO5-10A","KKO5-13A","KKO5-14A","KKO5-18A","KKO5-19A","KKO5-21A","KKO-5-22A","KKO5-24A","KKO5-2A","KKO5-3A"],
#"KKO6": ["KKO6-10","KKO6-10","KKO6-13","KKO6-16","KKO6-2","KKO6-23","KKO6-4","KKO6-6","KKO6-7","KKO6-8","KKO6-9"],
#"KLN1": ["KLN1-13KB","KLN1-18KB","KLN1-1KB"],
#"KLN2": ["KLN2-1A","KLN2-1A","KLN2-2A","KLN2-3A","KLN2-4A","KLN2-5A"],
#"KSA1": ["KSA1-10A","KSA1-10A","KSA1-11","KSA1-12","KSA1-14","KSA1-18","KSA1-1A","KSA1-2","KSA1-21","KSA1-22","KSA1-24"],
#"KSA2": ["KSA2-10A","KSA2-10A","KSA2-12","KSA2-13","KSA2-14","KSA2-15","KSA2-16","KSA2-19","KSA2-20","KSA2-22","KSA2-27"],
#"KSA3": ["KSA3-11A","KSA3-12A","KSA3-13A","KSA3-14A","KSA3-15A","KSA3-19A","KSA3-1A","KSA3-3A","KSA3-5A","KSA3-8A"],
#"KSC1": ["KSC1-11A","KSC1-14A","KSC1-19A","KSC1-20A","KSC1-21A","KSC1-22A","KSC1-24A","KSC1-6A","KSC1-7A","KSC1-8A"],
#"KSC4": ["KSC4-10KB","KSC4-12KB","KSC4-16KB","KSC4-1KB","KSC4-20KB","KSC4-21KB","KSC4-5KB","KSC4-6KB","KSC4-7KB","KSC4-8KB"],
#"KSD1": ["KSD1-10","KSD1-10","KSD1-12","KSD1-13","KSD1-14","KSD1-15","KSD1-19","KSD1-20","KSD1-23","KSD1-24","KSD1-25"],
#"KSD2": ["KSD2-10A","KSD2-11A","KSD2-12A","KSD2-13A"],
#"KYU2": ["KYU2-10A","KYU2-16A","KYU2-18A","KYU2-19A","KYU2-21A","KYU2-22A","KYU2-3A","KYU2-5A","KYU2-6A","KYU2-9A"],
#"KYU7": ["KYU7-10A","KYU7-13A","KYU7-14A","KYU7-17A","KYU7-17A","KYU7-18A","KYU7-19A","KYU7-1A","KYU7-21A","KYU7-2A","KYU7-6A"],
#"KZH10": ["KZH10-10A","KZH10-10A","KZH10-12A","KZH10-12A","KZH10-14A","KZH10-16A","KZH10-18A","KZH10-19A","KZH10-23A","KZH10-4A","KZH10-6A","KZH10-8A"],
#"KZH8": ["KZH8-11A","KZH8-11A","KZH8-13A","KZH8-14A","KZH8-16A","KZH8-18A","KZH8-19A","KZH8-20A","KZH8-21A","KZH8-22A","KZH8-8A"],
#"KAN1": ["KAN1-10A","KAN1-11","KAN1-15A","KAN1-20","KAN1-3"],
#"KAN4": ["KAN4-14A","KAN4-20A","KAN4-23A"],
#"KAN5": ["KAN5-1A","KAN5-1A","KAN5-2A","KAN5-3A","KAN5-4A","KAN5-5A"],
#"KB1": ["KB1-1","KB1-1","KB1-10","KB1-13","KB1-14","KB1-4"],
"KFU7": ["KFU7-10K","KFU7-11K","KFU7-12K","KFU7-16K","KFU7-21K","KFU7-25K","KFU7-2KA","KFU7-5A","KFU7-7KA","KFU7-8KA"],
"KGD3": ["KGD3-12A","KGD3-13A","KGD3-16A","KGD3-17A","KGD3-18A","KGD3-19A","KGD3-20A","KGD3-23A","KGD3-2A","KGD3-3A"],#"KGD3-5A"],
"KGD4": ["KGD4-10","KGD4-11A","KGD4-19","KGD4-1A","KGD4-24"], #"KGD4-13A","KGD4-14A","KGD4-23","KGD4-2A","KGD4-7A"
"KGD5": ["KGD5-6A","KGD5-7A"],
"KGU1": ["KGU1-10A","KGU1-11A","KGU1-12A","KGU1-13A","KGU1-1A","KGU1-20A","KGU1-22A","KGU1-3A","KGU1-6A","KGU1-7A","KGU1-9A"],
"KHN10": ["KHN10-10A","KHN10-13A","KHN10-15A","KHN10-17A","KHN10-18A","KHN10-1A","KHN10-22A","KHN10-23A","KHN10-24A"],#"KHN10-7A"],
"KHN11": ["KHN11-15A","KHN11-1A","KHN11-8A"],#"KHN11-6A",
"KHN5": ["KHN5-10","KHN5-11A","KHN5-12A","KHN5-13A","KHN5-15A","KHN5-16A","KHN5-17A","KHN5-1A","KHN5-22A","KHN5-24A"],
"KHN7": ["KHN7-10A","KHN7-11A","KHN7-16A","KHN7-17A","KHN7-19A","KHN7-22A","KHN7-2A","KHN7-5A","KHN7-6A","KHN7-7A"],
"KHN9": ["KHN9-10A","KHN9-12A","KHN9-13A","KHN9-14A","KHN9-17A","KHN9-20A","KHN9-21A","KHN9-22A","KHN9-24A","KHN9-7A"],
#"KHU3": ["KHU-16A","KHU3-11A","KHU3-11A","KHU3-12A","KHU3-13A","KHU3-14A","KHU3-15A","KHU3-17A","KHU3-19A","KHU3-1A","KHU3-2A"],
#"KHU2": ["KHU2-10A","KHU2-10A","KHU2-11A","KHU2-14A","KHU2-15A","KHU2-16A","KHU2-22A","KHU2-2A","KHU2-4A","KHU2-6A","KHU2-9A"],
#"KJI2": ["KJI2-11A","KJI2-11A","KJI2-13A","KJI2-14A","KJI2-15A","KJI2-16A","KJI2-18A","KJI2-24A","KJI2-25A","KJI2-6A","KJI2-7A"],
#"KJI5": ["KJI5-10A","KJI5-10A","KJI5-11A","KJI5-18A","KJI5-24A","KJI5-2A"],
#"KJIX2": ["KJIX2-12A","KJIX2-12A","KJIX2-15A","KJIX2-17A","KJIX2-1A","KJIX2-23A","KJIX2-24A","KJIX2-25A","KJIX2-26A","KJIX2-28A","KJIX2-3A"],
"KTW1": ["KTW1-11A","KTW1-14A","KTW1-15A","KTW1-1A","KTW1-22A","KTW1-23A","KTW1-3A","KTW1-4A","KTW1-7A","KTW1-9A"],
"KTW4": ["KTW4-12B","KTW4-13B","KTW4-15B","KTW4-17B","KTW4-18B","KTW4-19B","KTW4-20B","KTW4-21B","KTW4-22B","KTW4-24B"],
"KTW6": ["KTW6-12A","KTW6-15A","KTW6-17A","KTW6-18A","KTW6-1A","KTW6-23A","KTW6-24A","KTW6-2A","KTW6-5A","KTW6-7A"],
#"NI1": ["NI1-8K","NI2-14K"],
#"soybean":["soybean"] #If I include this, it makes a samples called soybean that is entirely missing data and raxml quits1
}

In [8]:
# select a scaffold idx, start, and end positions
T_denovo = ipa.window_extracter(
    data = data,
    name="TW.SCh_denovo_CR",
    workdir=OUTDIR,
    scaffold_idxs=range(16336),
    mincov=2,
    imap=imap,
    minmap= {i: 0 for i in imap}, #half of each key in imap dictionary,
    consensus_reduce=True,
)

#write output to file
T_denovo.run(force=True)
# show stats of the window
T_denovo.stats

Wrote data to /rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan/TW.SCh_denovo_CR.phy


Unnamed: 0,scaffold,start,end,sites,snps,missing,samples
0,concatenated,0,3187245,3187245,27791,0.567,13


In [9]:
# init raxml object with input data and (optional) parameter options
rax = ipa.raxml(data=T_denovo.outfile, T=24, N=100, m = "GTRGAMMA", name="TaiwanCh-denovo_CR",
               workdir="/rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan")

# print the raxml command string for prosperity
print(rax.command)

# run the command, (options: block until finishes; overwrite existing)
rax.run(block=True, force=True)

/rigel/home/slh2181/miniconda3/bin/raxmlHPC-PTHREADS-AVX2 -f a -T 24 -m GTRGAMMA -n TaiwanCh-denovo_CR -w /rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan -s /rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan/TW.SCh_denovo_CR.phy -p 54321 -N 100 -x 12345
job TaiwanCh-denovo_CR finished successfully


In [19]:
# plot the tree for this genome window
tre = toytree.tree(rax.trees.bipartitions)
#tre = toytree.tree("/rigel/dsi/users/slh2181/Pmontana/Analysis/Taiwan/RAxML_bipartitions.TW.SCh_denovo3")
rtre = tre.root(["KGD5","KHN11","KHN5","KHN9","KHN10"])

colorlist = ["navy" if "KTW1" in tip else 
             "cornflowerblue" if "KTW4" in tip else
             "deepskyblue" if "KTW6" in tip else
             "brown" if "KFU7" in tip else
             "darkorange" if "KGD3" in tip else
             "gold" if "KGD4" in tip else
             "sandybrown" if "KGD5" in tip else
             "purple" if "KGU1" in tip else
             "red" if "KHN5" in tip else
             "deeppink" if "KHN9" in tip else
             "magenta" if "KHN7" in tip else
             "violet" if "KHN10" in tip else
             "mediumvioletred" for tip in rtre.get_tip_labels()]

rtre.draw(node_labels="support",
                                                            #tip_labels_align=True,              
                          node_labels_style={"font-size": "10px"},
                        node_sizes=18,
                        node_style={
                                "fill": "white",
                                "stroke": "black",
                        },
                        tip_labels_colors=colorlist,
                        tip_labels_style={"font-size":"12px"})

(<toyplot.canvas.Canvas at 0x2aab0c8bc690>,
 <toyplot.coordinates.Cartesian at 0x2aab0c7d4bd0>,
 <toytree.Render.ToytreeMark at 0x2aab0c8d2790>)

In [20]:
rtre.newick

'((KHN10:0.00339739,(KHN9:0.0032243,(KHN5:0.00308407,(KHN11:0.00298904,KGD5:0.00324515)100:0.00100339)100:0.000935809)78:0.000918366)100:0.00733655,((KTW4:0.00396346,(KTW6:0.00350032,KTW1:0.00377938)92:0.000977231)100:0.00146057,((KFU7:0.00466767,KGD4:0.00437057)80:0.000857203,(KHN7:0.00460131,(KGD3:0.00437608,KGU1:0.00400615)62:0.000845919)79:0.000692173)100:0.000965772)100:0.00733655);'