Adding 2 calibraitons to the 10 calibration analysis using information from Matt Friedman.

>Hi Mike,

>I've gone ahead and added two calibrations and re-calculated the rest based on fossils not used as calibrations in this paper but which provide additional constraints on divergence times.

>Let me know how this works out.

>Cheers,
M

Here are the calibrations:

```
Node,Taxon,Minimum,Published 95% CI (Benton et al. 2015),Published 95% CI (Friedman et al. 2014),New: Estimated 95% CI,"Near et al. (2012, 2013)",Benton et al. (2015),Hedman age sequence,,
222,Calatomus,11.9,n/a,n/a,43.95,No,No,"98, 98, 69.71, 55.20, 54.17, 49.0, 13.82, 11.9",,
194,Tarkus,49,n/a,n/a,53.93,No,No,"98, 98, 69.71, 55.20, 54.17, 54.17, 49, 49, 49",,
179,tholichthys' larva,29.62,n/a,n/a,59.26,Yes*,No,"98, 98, 69.71, 55.20, 54.17, 54.17, 29.62",,
150,Eastmanelepes,49,n/a,n.a,61.61,Yes*,No,"98, 98, 69.71, 55.20, 54.17, 49, 49",,
137,Mene ,55.2,n/a,n/a,95.64,Yes*,No,"98, 98, 69.71, 55.20",,
130,Eobuglossus Turahbuglossus,41.2,n/a,n/a,53.88,Yes*,No,"98, 98, 69.71, 55.20, 49, 49, 49, 41.2",,
103,Ramphexocoetus,49,130.8,100.89,80.52,Yes*,Yes,"98, 98, 69.71, 55.2, 49",,
80,Eocoelopoma,54.17,n/a,n/a,95.58,No,No,"98, 98, 69.71, 54.17",,
29,"Eoholocentrum, Berybolcensis, Tenuicentrum",49,n/a,n/a,109.29,Yes* ,No,"98, 98, 49",,
22,Aipichthys,98,158.3,136.03,128.82,No,Yes,"98, 98",,
11,Homonotichthys,93.9,n/a,n/a,128.04,Yes*,No,"98, 93.9",,
10,Mcconichthys,63.1,n/a,n/a,93.51,No,No,"98, 98, 93.9, 80.67, 63.1",,
```

In [1]:
import sys,os,os.path
sys.path.append(os.path.expanduser('~/Dropbox/tools'))

##see script in Dropbox/tools for these functions
from phy_utils import setTreeStyle, rankify, facify, getPamlPars, writeCTL, makeGroups, random_color, rgb2hex, hls2hex,mylayout, calibrations_layout, attachSupport, makeSupportSymbols, addPamlCalibration, makePamlTree, annotatePaml, annotateAndReturn, writePamlTree, copyPamlAlignment, writePamlCtl    
##functions for generating random colors
from ete2 import Tree, faces, TreeStyle, NodeStyle, CircleFace
import colorsys
from numpy import random
from __future__ import division
from collections import OrderedDict
import random

Using these tree directories: **/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/trees/75p/** and **/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/trees/75p/**


UPDATE: the tree analyses are finalized. There are (in 75% complete)

- ExaBayes_ConsensusExtendedMajorityRuleNewick.Acanthomorph-75p-STDPART-1.5M-Burn25-FINAL
- ExaBayes_ConsensusExtendedMajorityRuleNewick.Acanthomorph-75p-UNPART-1.5M-Burn25-FINAL.tre
- RAxML.acanthomorph-no-chauliodius-75p-STDPART.tre
- RAxML.acanthomorph-no-chauliodius-75p-UNPART.tre

and in 95% complete:

- ExaBayes_ConsensusExtendedMajorityRuleNewick.Acanthomorph-95p-UNPART-1M-Burn25-FINAL.tre
- RAxML.acanthomorph-no-chauliodius-95p-UNPART.tre

Will present the 75% complete tree in the paper

housekeeping

- set the home directory
- assign identifier to each node for naming purposes
- correct names on tree
- add families
- set the outgroup to Alepisaurus

Taxonomy has already been saved to a file: /Users/michael_alfaro/Dropbox/malfaro-acanthomorph/manuscript/pnas tex/ETE_work/converted_pngs/ranks.txt. Read this in to assign tips to families

In [2]:
#making the acanthomorph tree look good
import os, uuid
from ast import literal_eval
from ete2 import Tree, TreeStyle, AttrFace, NodeStyle

os.chdir("/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/manuscript/pnas tex/ETE_work")
home = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/manuscript/pnas tex/ETE_work/ete_acanthomorph_fig/"

exaPartTree = Tree("/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/trees/75p/ExaBayes_ConsensusExtendedMajorityRuleNewick.Acanthomorph-75p-STDPART-1.5M-Burn25-FINAL.tre")
exaNoPartTree = Tree("/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/trees/75p/ExaBayes_ConsensusExtendedMajorityRuleNewick.Acanthomorph-75p-UNPART-1.5M-Burn25-FINAL.tre")
raxPartTree = Tree("/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/trees/75p/RAxML.acanthomorph-no-chauliodius-75p-STDPART.tre")
raxNoPartTree = Tree("/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/trees/75p/RAxML.acanthomorph-no-chauliodius-75p-UNPART.tre")

fourTrees = {
"exa_75_part" : exaPartTree, 
"exa_75_no_part" : exaNoPartTree, 
"rax_75_part" : raxPartTree,  
"rax_75_no_part" : raxNoPartTree 
}

##need to root the exabayes trees, set root branch to 0, assign uuid
for name in fourTrees.keys():
    tt = fourTrees[name]
    ancestor = "alepisaurus_ferox"
    tt.set_outgroup(ancestor)
    tt.get_tree_root().dist = 0.001
    counter = 0
    for node in tt.traverse("postorder"):
        node.add_features( nodeid = str(uuid.uuid1() ) )
        node.add_features( nodenumber = str(counter) )# for calibrations
        node.add_features( order = None)
        counter += 1
        if node.is_leaf():
            node.name = node.name.capitalize().replace("_", " ")
            #print node.name
        if node.name == "Takifugu occelatus":
            node.name = "Takifugu ocellatus"
        if node.name == "Ostorhinchus nigrofasciatus":
            node.name = "Ostorhinchus nigrofasciatus"
        if node.name == "Sargocentron coruscum2":
            node.name = "Sargocentron coruscum"
            print "changed {}".format(node.name)


outfile = "4_27_2015_acanthomorh_renamed.tre"

tt = fourTrees["exa_75_part"]
ml = fourTrees["rax_75_part"] #just grabbing the ml tree for the bs values

os.chdir(home)
tt.write( features = ["nodeid", "nodenumber"], outfile = outfile)


rr = open("/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/manuscript/pnas tex/ETE_work/converted_pngs/ranks.txt").read()
fam_key = literal_eval(rr)
ranks = fam_key

attachSupport(tt, ml)
#print "fam keys has {} keys and {} values".format( len(fam_key.keys() ), len(fam_key.values()))

changed Sargocentron coruscum
changed Sargocentron coruscum
changed Sargocentron coruscum
changed Sargocentron coruscum


In [3]:
import glob
import os

#directory for images from Julie
png_dir = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/manuscript/pnas tex/ETE_work/converted_pngs"
BG_COLORS = {} #dictionary for holding colors



tips = tt.get_leaf_names() 
os.chdir(png_dir)
pic_names = glob.glob("*.png") #just get pngs
pic_genus = [gg.split("_")[0] for gg in pic_names]
tip_genus = [names.split(" ")[0] for names in tips]

#print "there are {} total illustrations".format(len(pic_names))
for fish in pic_names:
    name =  " ".join(fish.split("_")[0:2])
    intips = names in tips
    #print "{} in the tree: {}".format(name, str(intips))

#print tips

#set methods to find tip genera that have illustrations
inter = set(tip_genus).intersection(pic_genus)

###make a dict to assign names and imagefaces
from ete2 import Tree, faces, TreeStyle
face_dict ={}
species_dict = {}
for images in glob.glob("*.png"):
    genus = images.split("_")[0]
    species = images.split("_")[1]
    nn = " ".join([genus, species])
    #print nn
    newface = faces.ImgFace(png_dir + "/" + images, width = 250)
    #newface = faces.ImgFace(png_dir + "/" + images)
    face_dict[nn] = newface
#len(face_dict.keys())

#print "there are {} images in face_dict".format(len(face_dict.keys()))

###Here I define some colors and palettes for clades of interest
randomcolors = False #flag for selecting random colors or a palette

scarus_palette_complete = ["#ffffff",  "#91c9dd",  "#8ec3da",  "#e4e1da",  "#f1f0ea",  "#d29f7e",  "#db9ab0",  "#efd5db",  "#4f93b9",  "#f0ede6",  "#ede9e7",  "#ec926b",  "#8c7da5",  "#e96e7a",  "#ceaa9f",  "#cc778d",  "#ebe5df",  "#a65d72",  "#b98698",  "#99bbd3",  "#99c5d9",  "#bfd9e2",  "#fef4ee",  "#f0e6dc",  "#ebf3ef",  "#ecebec",  "#f8f4eb",  "#fff6ed"]
scarus_palette_light = ["#d3aeb9", "#c6bed2", "#a7c9dc", "#e6bbc6", "#f6c9b5", "#edcdd8", "#ccdde9"]

clades = {} #node id and taxa of all taxonomic groups 
clades["Paracanthopterygii"] = makeGroups(tt, ("Lampris guttatus", "Gadus morhua"), "#B7E1DC" )
clades["Tetraodontiformes"] = makeGroups(tt, ("Canthigaster rostrata", "Balistes capriscus"), "#E2D2C2" )
#clades["lophiiforms"] = makeGroups(tt, ("Ogcocephalus radiatus", "Antigonia capros") )
clades["Scombrimorpha"] = makeGroups(tt, ("Syngnathus fuscus", "Cubiceps baxteri"), "#DDDCD5" )
clades["Gobiomorpha"] = makeGroups(tt, ("Kurtus gulliveri", "Valenciennea strigata"), "#E2D7D4" )
clades["Beryciformes"] = makeGroups(tt, ("Rondeletia loricata", "Myripristis leiognathus"), "#E6A8A4" )
clades["Ovalentaria"] = makeGroups(tt, ("Xenentodon cancila", "Scartella cristata"), "#D6AC83")
clades["Labridae"] = makeGroups( tt, ("Epibulus insidiator", "Halichoeres poeyi"), "#EEB596" )
clades["Acanthomorpha"] = makeGroups(tt, ("Lampris guttatus", "Canthigaster rostrata"), "#F3E1CD"  )
clades["Syngnathiformes"] = makeGroups( tt, ("Parupeneus multifasciatus", "Syngnathus fuscus"), "#D5C889" )
clades["Scombriformes"]  = makeGroups ( tt, ("Chiasmodon niger", "Cubiceps baxteri") )
#clades["carangids"]  = makeGroups ( tt, ("Alepes kleinii", "Toxotes jaculatrix"), "#EDD6AE" )
clades["Carangimorpha"] = makeGroups( tt, ("Alepes kleinii", "Centropomus medius"), "#F6E0C0")
clades["Pleuronectiformes"] = makeGroups( tt, ("Parachirus xenicus", "Citharoides macrolepis"), "#EBC8AC" )
clades["Percomorpha"] = makeGroups( tt, ("Carapus bermudensis", "Citharoides macrolepis"), "#D8D2C9")
clades["Perciformes"] = makeGroups ( tt, ("Cephalopholis argus", "Taenianotus triacanthus"), "#DECAC4" )
clades["Eupercomorpha"] = makeGroups(tt, ("Canthigaster rostrata", "Haemulon album"), "#D9DED5") 


##add the taxonomic group to the parent node
for clade in clades.keys():
    nid = clades[clade][0]
    res = tt.search_nodes(nodeid = nid)
    res[0].add_features( order = clade)
    #print res[0]


if randomcolors:
    possibleColors = range(360, 0, -int( 360./ len(clades.keys()) ) ) #get a set of color values based on the number of clades
    for clade in clades.keys():
        col = possibleColors.pop()
        color = random_color(h=float(col)/360, s=0.3, l=0.9)
        nodeid = clades[clade][0]
        BG_COLORS[nodeid] = color
    
else:
    for clade in clades.keys():
        nodeid = clades[clade][0]
        if clades[clade][2]:
            #if a user defined color is present
            color = clades[clade][2]
            #print "using user defined color {}".format(color)
        else:
            color = str( random.choice(scarus_palette_light) )
            #print "using random color from palette: {}".format(color)
            
        BG_COLORS[nodeid] = color

##try coloring nodes from BG_Color here
for node in tt.traverse():
    if node.nodeid in BG_COLORS:
        #print "this node is in colors" + node.nodeid
        node.img_style["bgcolor"] = BG_COLORS[str(node.nodeid)]

In [6]:
import time
import datetime

fig_dir = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/manuscript/pnas tex/figures/"

tt.ladderize()
rankify(tt, ranks)
facify(tt, face_dict)

tt.render("circle_2a.pdf", tree_style=setTreeStyle("circle", mylayout), w=1200)
tt.render("rect_3a.pdf", tree_style=setTreeStyle("rect", mylayout), w=1200)
tree_name = "bayes_75per_comb_right.pdf"
file_name = fig_dir + tree_name

tt.render(file_name, tree_style=setTreeStyle("rect", mylayout), w=1200)

###write calibration map for full tree
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
writedir = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/"
tt.render(writedir +"{}_labelled_nodes_acanthomorphs.pdf".format(st),  tree_style=setTreeStyle("rect", mylayout), w=1200)




Alepisaurus ferox
Alepisauridae
<class 'ete2.coretype.tree.TreeNode'>

Ceratoscopelus warmingii
Myctophidae
<class 'ete2.coretype.tree.TreeNode'>





Anoplogaster cornuta
Anoplogastridae
<class 'ete2.coretype.tree.TreeNode'>

Lampris guttatus
Lamprididae
<class 'ete2.coretype.tree.TreeNode'>





Zu elongatus
Trachipteridae
<class 'ete2.coretype.tree.TreeNode'>
Regalecus glesne
Regalecidae
<class 'ete2.coretype.tree.TreeNode'>
Polymixia lowei
Polymixiidae
<class 'ete2.coretype.tree.TreeNode'>



Rondeletia loricata
Rondeletiidae
<class 'ete2.coretype.tree.TreeNode'>



Percopsis omiscomycus
Percopsidae
<class 'ete2.coretype.tree.TreeNode'>
Aphredoderus sayanus
Aphredoderidae
<class 'ete2.coretype.tree.TreeNode'>
Zeus faber
Zeidae
<class 'ete2.coretype.tree.TreeNode'>
Zenopsis conchifera
Zeidae
<class 'ete2.coretype.tree.TreeNode'>
Stylephorus chordatus
Stylephoridae
<class 'ete2.coretype.tree.TreeNode'>

Sargocentron coruscum

Carapus bermudensis
Carapidae
<class 'ete2.coretype.tree.

{'faces': [[557.4539363484083,
   8147.738693467332,
   672.0268006700163,
   8161.809045226126,
   170,
   'Hypoplectrus puela'],
  [557.4539363484083,
   8161.809045226126,
   596.9849246231151,
   8169.849246231151,
   170,
   'Serranidae'],
  [557.4539363484083,
   8169.849246231151,
   596.9849246231151,
   8177.889447236176,
   170,
   'Serranidae'],
  [672.0268006700163,
   8048.241206030146,
   839.5309882747064,
   8162.814070351754,
   170,
   None],
  [672.0268006700163,
   8162.814070351754,
   839.5309882747064,
   8277.386934673363,
   170,
   None],
  [374.9222301986119,
   707.87269681742,
   512.9456807848765,
   721.943048576214,
   16,
   'Aphredoderus sayanus'],
  [374.9222301986119,
   721.943048576214,
   431.8736539842065,
   729.9832495812391,
   16,
   'Aphredoderidae'],
  [374.9222301986119,
   729.9832495812391,
   431.8736539842065,
   738.0234505862642,
   16,
   'Aphredoderidae'],
  [512.9456807848765,
   615.0753768844218,
   680.4498683895665,
   722.948

###writing files for mcmctree analysis

This block will print out multiple calibration schemes but am only interested in scheme 3 that has lower and upper bounds on all nodes we used.

    
- for each alignment (partitioned or unpartitioned)
    - for each scheme
        - make a directory with 
            - alignment
            - treefile
            - control file
        - modify the control file
            - hessian or post-hessian
            - outfile name
            - treefile name
            - use data
            - number of genes
            
making run files for paml analysis using 95% complete matrix AND

- the 12 calibrations that Friedman sent
- also want to print a figure with this scheme labelled

In [8]:
import copy
import pandas as pd
import re as re
###deepcopy the tree so that I can preserve the pruned and unpruned versions

        

        

pruned_tree = copy.deepcopy(tt)
#first prune out the outgroups
tips = pruned_tree.get_leaf_names()
if "Alepisaurus ferox" in tips or "Ceratoscopelus warmingii" in tips:
    tips.remove("Alepisaurus ferox")
    tips.remove("Ceratoscopelus warmingii")
    pruned_tree.prune(tips)

print "unpruned tree has {} tips and pruned tree has {} tips".format(len(tt.get_leaf_names()), len(pruned_tree.get_leaf_names()) )

#next read in the calibrations from Friedmans data
infile = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/12_calibration_bounds.csv"
writedir = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/"

scheme1, scheme2, scheme3 = {}, {}, {}

dd = pd.read_csv(infile, index_col=0, na_values="n/a") #the nodes are the index in this dataframe
pd.set_option('float_format', '{:20,.2f}'.format)

#populate the scheme dictionaries
#Matt gave alternative calibratiosn for these nodes from different sources. For the paper I am using lower and upper bounds.
for cal_node in dd.index:
    min_age = dd.loc[cal_node][1] 
    max_age_1 = dd.loc[cal_node][2]
    max_age_2 = dd.loc[cal_node][3]
    max_age_3 = dd.loc[cal_node][4]
    
    scheme1[cal_node] = [min_age, max_age_1]
    scheme2[cal_node] = [min_age, max_age_2]
    scheme3[cal_node] = [min_age, max_age_3]

unpruned tree has 120 tips and pruned tree has 118 tips


###for the root age I am using this prior from Matt
>Hi Mike,

>We'll, if we stick to the same procedure as the other nodes, and consider the information from outgroups . . . we can provide an emprically informed 95% prior of 143.0 Ma, with a hard minimum age of 98 Ma.

In [10]:
import os
import glob
import shutil

###write out PAML for for analysis with Brant's cluster

###running now to only look at fastest evolving three partitions
alignpath = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/alignments/" #directory with alighments
outpath = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/4_partitions" #where run files will be created
workingdir = writedir #working in this folder

heshlist = ["hessian", "post-hessian"]
nruns = 10 #number of runs for each analysis using the approximation

##for making the control file (parsi_mcmc.ctl is parameter values I updated after experimenting and emailing Ziheng in June)
ctlfile = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/control_file_template/parsi_mcmc.ctl"
###should move this elsewhere--only need to do it once to get the paml ctl structure
with open(ctlfile, 'r') as ff:
    ctl = ff.read()
    ff.close()

###overwrites files eachtime this is called
if not os.path.exists(outpath):
    os.makedirs(outpath)
else:
    shutil.rmtree(outpath)           #removes all the subdirectories!
    os.makedirs(outpath)
    
###In June 2015 Brant sent me a PAML formatted alignment with 4 partitions based on the numer of parsimony informative sites at each site





#sate-gblocks-clean-min-114-taxa-missing-no-outgroups-CLUSTERED-raxml.PAML.phylip.parsi.seq

#aligns = glob.glob(alignpath + "*parsi.seq")

#I am also going to try removing the biggest slowest partiton to see if that changes anything
#aligns = glob.glob(alignpath + "3_part*.*")
aligns = glob.glob(alignpath + "4_part*.*")
basectl = getPamlPars(ctl) #suck in base parameters for ctl file

#alignments with parsimony clusters end in sate-gblocks-clean-min-114-taxa-missing-no-outgroups-CLUSTERED-raxml.PAML.phylip.parsi.seq
basectl['RootAge'] = "'B(9.8, 14.3, 1e-300, 0.05)'" #from Matt's email, above
basectl['print'] = '1000'
basectl['alpha'] = 0.1
basectl['ncatG'] = 5
basectl['BDparas'] = '1 1 0'

#schemes = [scheme1, scheme2, scheme3]

###only going to look at scheme 3 for this analysis
schemes = [scheme3]
tree_title = "12_cali_no_outgroups_scheme_{}.tre".format(3)
numtaxa = len(pruned_tree.get_leaf_names())
numtrees = 1 # change this if ever considering multiple topologies
paml_tree_string = annotatePaml(pruned_tree, scheme3, raw = False)

pruned_paml_tree = writePamlTree(paml_tree_string, tree_title, numtaxa, numtrees, return_tree = True )
#write the calibration map for the pruned tree
pruned_tree.render(writedir +"no_out_scheme_2_labelled_pamlCalibrations_acanthomorphs_{}.pdf".format(tree_title), tree_style=setTreeStyle("rect", calibrations_layout), w=1200)   



 in annotatePaml
node is 194 lower is 49.0 upper is 53.93	
calibration text is 'B(4.9,5.393,1e-300,0.05)'
targ cali node is 194 and text is 'B(4.9,5.393,1e-300,0.05)'


 in annotatePaml
node is 22 lower is 98.0 upper is 128.82	
calibration text is 'B(9.8,12.882,1e-300,0.05)'
targ cali node is 22 and text is 'B(9.8,12.882,1e-300,0.05)'


 in annotatePaml
node is 103 lower is 49.0 upper is 80.52	
calibration text is 'B(4.9,8.052,1e-300,0.05)'
targ cali node is 103 and text is 'B(4.9,8.052,1e-300,0.05)'


 in annotatePaml
node is 137 lower is 55.2 upper is 95.64	
calibration text is 'B(5.52,9.564,1e-300,0.05)'
targ cali node is 137 and text is 'B(5.52,9.564,1e-300,0.05)'


 in annotatePaml
node is 10 lower is 63.1 upper is 93.51	
calibration text is 'B(6.31,9.351,1e-300,0.05)'
targ cali node is 10 and text is 'B(6.31,9.351,1e-300,0.05)'


 in annotatePaml
node is 11 lower is 93.9 upper is 128.04	
calibration text is 'B(9.39,12.804,1e-300,0.05)'
targ cali node is 11 and text is 'B(9.39,12

{'faces': [[380.19277108433727,
   10875.32530120482,
   546.8433734939758,
   10894.554216867471,
   164,
   'Cephalopholis argus'],
  [380.19277108433727,
   10894.554216867471,
   434.21686746987945,
   10905.542168674701,
   164,
   'Serranidae'],
  [380.19277108433727,
   10905.542168674701,
   434.21686746987945,
   10916.53012048193,
   164,
   'Serranidae'],
  [750.7951807228917,
   16608.28915662651,
   922.0240963855423,
   16627.51807228916,
   233,
   'Canthigaster rostrata'],
  [750.7951807228917,
   16627.51807228916,
   823.1325301204821,
   16638.50602409639,
   233,
   'Tetraodontidae'],
  [750.7951807228917,
   16638.50602409639,
   823.1325301204821,
   16649.493975903617,
   233,
   'Tetraodontidae'],
  [454.3132530120481,
   11850.963855421689,
   654.8433734939758,
   11870.19277108434,
   174,
   'Pempheris schomburgkii'],
  [454.3132530120481,
   11870.19277108434,
   519.3253012048192,
   11881.180722891568,
   174,
   'Pempheridae'],
  [454.3132530120481,
   1

In [11]:
%qtconsole
basectl['ndata'] = "4" #set number of partitions
#basectl['RootAge'] = "''" 
for alignment in aligns:
    #basectl['seqfile'] = align #set alignment name in ctl file
    #aligndir = outpath + os.path.basename(alignment).split("_")[0] + "_part/"
    for num, scheme in enumerate(schemes):
        #tree_title = "parsi_scheme_{}.tre".format(num)
        tree_title = "12_cali_no_outgroups_scheme_{}.tre".format(3) #only looking at scheme 3

        for hes in heshlist:
            if hes == "post-hessian":
                basectl['burnin'] = '5000'
                basectl['samplefreq'] = '100'
                basectl['nsample'] = '10000'
                for run in range(nruns):
                    #create a directory for each replicate of the run
                    rep_dir_text = "scheme-{}-HE-{}-run{}".format(num, hes, run+1)
                    rep_path = outpath + "/" + rep_dir_text
                    os.makedirs(rep_path)
                    os.chdir(rep_path)
                    paml_tree_string = annotatePaml(pruned_tree, scheme)
                    writePamlTree(paml_tree_string, tree_title, numtaxa, numtrees )
                    copyPamlAlignment(alignment, rep_path)
                    #writePamlCtl(hes, tree_title, alignment, basectl)
                    writeCTL(hes, tree_title, alignment, basectl, run+1, )
                    
                    """
                    #schemenum = str(num)
                    curdir = aligndir + "scheme_{}_{}_run_{}".format(num, hes, run+1)
                    os.makedirs(curdir)
                    os.chdir(curdir)
                    paml_tree_string = annotatePaml(pruned_tree, scheme)
                    
                    writePamlTree(paml_tree_string, tree_title )
                    copyPamlAlignment(alignment, curdir)
                    #writePamlCtl(hes, tree_title, alignment, basectl)
                    writeCTL(hes, tree_title, alignment, basectl, run+1, )
                    """
            else:
                basectl['burnin'] = '3000'
                basectl['samplefreq'] = '100'
                basectl['nsample'] = '7500'
                #schemenum = str(num)
                rep_dir_text = "scheme-{}-HE-{}".format(num, hes)
                rep_path = outpath + "/" + rep_dir_text
                os.makedirs(rep_path)
                os.chdir(rep_path)
                paml_tree_string = annotatePaml(pruned_tree, scheme)
                writePamlTree(paml_tree_string, tree_title, numtaxa, numtrees )
                copyPamlAlignment(alignment, rep_path)
                    #writePamlCtl(hes, tree_title, alignment, basectl)
                writeCTL(hes, tree_title, alignment, basectl )
                
                
                """
                curdir = aligndir + "scheme_{}_{}".format(num, hes)
                os.makedirs(curdir)
                os.chdir(curdir)
                #tree_title = "scheme_{}.tre".format(num)
                writePamlTree(paml_tree_string, tree_title )
                copyPamlAlignment(alignment, curdir)
                #writePamlCtl(hes, tree_title, alignment, basectl)
                writeCTL(hes, tree_title, alignment, basectl)
                """



 in annotatePaml
node is 194 lower is 49.0 upper is 53.93	
calibration text is 'B(4.9,5.393,1e-300,0.05)'
targ cali node is 194 and text is 'B(4.9,5.393,1e-300,0.05)'


 in annotatePaml
node is 22 lower is 98.0 upper is 128.82	
calibration text is 'B(9.8,12.882,1e-300,0.05)'
targ cali node is 22 and text is 'B(9.8,12.882,1e-300,0.05)'


 in annotatePaml
node is 103 lower is 49.0 upper is 80.52	
calibration text is 'B(4.9,8.052,1e-300,0.05)'
targ cali node is 103 and text is 'B(4.9,8.052,1e-300,0.05)'


 in annotatePaml
node is 137 lower is 55.2 upper is 95.64	
calibration text is 'B(5.52,9.564,1e-300,0.05)'
targ cali node is 137 and text is 'B(5.52,9.564,1e-300,0.05)'


 in annotatePaml
node is 10 lower is 63.1 upper is 93.51	
calibration text is 'B(6.31,9.351,1e-300,0.05)'
targ cali node is 10 and text is 'B(6.31,9.351,1e-300,0.05)'


 in annotatePaml
node is 11 lower is 93.9 upper is 128.04	
calibration text is 'B(9.39,12.804,1e-300,0.05)'
targ cali node is 11 and text is 'B(9.39,12

###Will also run the 3 partiton version of this to be done with it all

In [12]:
import os
import glob
import shutil

###write out PAML for for analysis with Brant's cluster

###running now to only look at fastest evolving three partitions
alignpath = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/alignments/" #directory with alighments
outpath = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/3_partitions" #where run files will be created
workingdir = writedir #working in this folder

heshlist = ["hessian", "post-hessian"]
nruns = 10 #number of runs for each analysis using the approximation

##for making the control file (parsi_mcmc.ctl is parameter values I updated after experimenting and emailing Ziheng in June)
ctlfile = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/control_file_template/parsi_mcmc.ctl"
###should move this elsewhere--only need to do it once to get the paml ctl structure
with open(ctlfile, 'r') as ff:
    ctl = ff.read()
    ff.close()

###overwrites files eachtime this is called
if not os.path.exists(outpath):
    os.makedirs(outpath)
else:
    shutil.rmtree(outpath)           #removes all the subdirectories!
    os.makedirs(outpath)
    
###In June 2015 Brant sent me a PAML formatted alignment with 4 partitions based on the numer of parsimony informative sites at each site





#sate-gblocks-clean-min-114-taxa-missing-no-outgroups-CLUSTERED-raxml.PAML.phylip.parsi.seq

#aligns = glob.glob(alignpath + "*parsi.seq")

#I am also going to try removing the biggest slowest partiton to see if that changes anything
#aligns = glob.glob(alignpath + "3_part*.*")
aligns = glob.glob(alignpath + "3_part*.*")
basectl = getPamlPars(ctl) #suck in base parameters for ctl file

#alignments with parsimony clusters end in sate-gblocks-clean-min-114-taxa-missing-no-outgroups-CLUSTERED-raxml.PAML.phylip.parsi.seq
basectl['RootAge'] = "'B(9.8, 14.3, 1e-300, 0.05)'" #from Matt's email, above
basectl['print'] = '1000'
basectl['alpha'] = 0.1
basectl['ncatG'] = 5
basectl['BDparas'] = '1 1 0'

#schemes = [scheme1, scheme2, scheme3]

###only going to look at scheme 3 for this analysis
schemes = [scheme3]
tree_title = "12_cali_no_outgroups_scheme_{}.tre".format(3)
numtaxa = len(pruned_tree.get_leaf_names())
numtrees = 1 # change this if ever considering multiple topologies
paml_tree_string = annotatePaml(pruned_tree, scheme3, raw = False)

pruned_paml_tree = writePamlTree(paml_tree_string, tree_title, numtaxa, numtrees, return_tree = True )

basectl['ndata'] = "3" #set number of partitions
#basectl['RootAge'] = "''" 
for alignment in aligns:
    #basectl['seqfile'] = align #set alignment name in ctl file
    #aligndir = outpath + os.path.basename(alignment).split("_")[0] + "_part/"
    for num, scheme in enumerate(schemes):
        #tree_title = "parsi_scheme_{}.tre".format(num)
        tree_title = "12_cali_no_outgroups_scheme_{}.tre".format(3) #only looking at scheme 3

        for hes in heshlist:
            if hes == "post-hessian":
                basectl['burnin'] = '5000'
                basectl['samplefreq'] = '100'
                basectl['nsample'] = '10000'
                for run in range(nruns):
                    #create a directory for each replicate of the run
                    rep_dir_text = "scheme-{}-HE-{}-run{}".format(num, hes, run+1)
                    rep_path = outpath + "/" + rep_dir_text
                    os.makedirs(rep_path)
                    os.chdir(rep_path)
                    paml_tree_string = annotatePaml(pruned_tree, scheme)
                    writePamlTree(paml_tree_string, tree_title, numtaxa, numtrees )
                    copyPamlAlignment(alignment, rep_path)
                    #writePamlCtl(hes, tree_title, alignment, basectl)
                    writeCTL(hes, tree_title, alignment, basectl, run+1, )
                    
                    """
                    #schemenum = str(num)
                    curdir = aligndir + "scheme_{}_{}_run_{}".format(num, hes, run+1)
                    os.makedirs(curdir)
                    os.chdir(curdir)
                    paml_tree_string = annotatePaml(pruned_tree, scheme)
                    
                    writePamlTree(paml_tree_string, tree_title )
                    copyPamlAlignment(alignment, curdir)
                    #writePamlCtl(hes, tree_title, alignment, basectl)
                    writeCTL(hes, tree_title, alignment, basectl, run+1, )
                    """
            else:
                basectl['burnin'] = '3000'
                basectl['samplefreq'] = '100'
                basectl['nsample'] = '7500'
                #schemenum = str(num)
                rep_dir_text = "scheme-{}-HE-{}".format(num, hes)
                rep_path = outpath + "/" + rep_dir_text
                os.makedirs(rep_path)
                os.chdir(rep_path)
                paml_tree_string = annotatePaml(pruned_tree, scheme)
                writePamlTree(paml_tree_string, tree_title, numtaxa, numtrees )
                copyPamlAlignment(alignment, rep_path)
                    #writePamlCtl(hes, tree_title, alignment, basectl)
                writeCTL(hes, tree_title, alignment, basectl )
                
                
                """
                curdir = aligndir + "scheme_{}_{}".format(num, hes)
                os.makedirs(curdir)
                os.chdir(curdir)
                #tree_title = "scheme_{}.tre".format(num)
                writePamlTree(paml_tree_string, tree_title )
                copyPamlAlignment(alignment, curdir)
                #writePamlCtl(hes, tree_title, alignment, basectl)
                writeCTL(hes, tree_title, alignment, basectl)
                """



 in annotatePaml
node is 194 lower is 49.0 upper is 53.93	
calibration text is 'B(4.9,5.393,1e-300,0.05)'
targ cali node is 194 and text is 'B(4.9,5.393,1e-300,0.05)'


 in annotatePaml
node is 22 lower is 98.0 upper is 128.82	
calibration text is 'B(9.8,12.882,1e-300,0.05)'
targ cali node is 22 and text is 'B(9.8,12.882,1e-300,0.05)'


 in annotatePaml
node is 103 lower is 49.0 upper is 80.52	
calibration text is 'B(4.9,8.052,1e-300,0.05)'
targ cali node is 103 and text is 'B(4.9,8.052,1e-300,0.05)'


 in annotatePaml
node is 137 lower is 55.2 upper is 95.64	
calibration text is 'B(5.52,9.564,1e-300,0.05)'
targ cali node is 137 and text is 'B(5.52,9.564,1e-300,0.05)'


 in annotatePaml
node is 10 lower is 63.1 upper is 93.51	
calibration text is 'B(6.31,9.351,1e-300,0.05)'
targ cali node is 10 and text is 'B(6.31,9.351,1e-300,0.05)'


 in annotatePaml
node is 11 lower is 93.9 upper is 128.04	
calibration text is 'B(9.39,12.804,1e-300,0.05)'
targ cali node is 11 and text is 'B(9.39,12

move and rename out.BV to in.BV and creat runlines

In [13]:
%qtconsole


In [18]:
import shutil, glob
#generates a line of code to execute on Brant's clusters

workdir = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/"
rundirs = [os.path.join(workdir,o) for o in os.listdir(workdir) if os.path.isdir(os.path.join(workdir,o))]

for rdir in rundirs:
    pamldirs = [os.path.join(rdir,o) for o in os.listdir(rdir) if os.path.isdir(os.path.join(rdir,o))]
    #bv_path = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/pars-10-cali-out.BV"
    #bv_path = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_10_calibrations/PAML_run_files/3_partitions/scheme-0-HE-hessian/out.BV"
    bv_path = rdir + "/scheme-0-HE-hessian/out.BV"
    for pp in pamldirs:
        shutil.copy2(bv_path, pp + "/in.BV")
        #os.remove(pp + "/out.BV")
        print pp
        runline = "mcmctree {}".format(os.path.basename(glob.glob(pp + "/*.ctl")[0]))
        os.chdir(pp)
        with open("run_command.txt", "w") as ff:
            ff.write(runline)
        ff.close()
    #%qtconsole
    
"""
parent_d = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_10_calibrations/PAML_run_files/3_partitions/"
pamldirs = [os.path.join(parent_d,o) for o in os.listdir(parent_d) if os.path.isdir(os.path.join(parent_d,o))]
#bv_path = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/pars-10-cali-out.BV"
bv_path = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_10_calibrations/PAML_run_files/3_partitions/scheme-0-HE-hessian/out.BV"
for pp in pamldirs:
    shutil.copy2(bv_path, pp + "/in.BV")
    #os.remove(pp + "/out.BV")
    print pp
    runline = "mcmctree {}".format(os.path.basename(glob.glob(pp + "/*.ctl")[0]))
    os.chdir(pp)
    with open("run_command.txt", "w") as ff:
        ff.write(runline)
    ff.close()
#%qtconsole
"""

/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/3_partitions/scheme-0-HE-hessian
/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/3_partitions/scheme-0-HE-post-hessian-run1
/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/3_partitions/scheme-0-HE-post-hessian-run10
/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/3_partitions/scheme-0-HE-post-hessian-run2
/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/3_partitions/scheme-0-HE-post-hessian-run3
/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/3_partitions/scheme-0-HE-post-hessian-run4
/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_12_calibrations/PAML_run_files/3_partitions/scheme-0-HE-post-hessian-run5
/Users/michael_alfaro/

'\nparent_d = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_10_calibrations/PAML_run_files/3_partitions/"\npamldirs = [os.path.join(parent_d,o) for o in os.listdir(parent_d) if os.path.isdir(os.path.join(parent_d,o))]\n#bv_path = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/pars-10-cali-out.BV"\nbv_path = "/Users/michael_alfaro/Dropbox/malfaro-acanthomorph/dating/Friedman_10_calibrations/PAML_run_files/3_partitions/scheme-0-HE-hessian/out.BV"\nfor pp in pamldirs:\n    shutil.copy2(bv_path, pp + "/in.BV")\n    #os.remove(pp + "/out.BV")\n    print pp\n    runline = "mcmctree {}".format(os.path.basename(glob.glob(pp + "/*.ctl")[0]))\n    os.chdir(pp)\n    with open("run_command.txt", "w") as ff:\n        ff.write(runline)\n    ff.close()\n#%qtconsole\n'