@@ -9,7 +9,6 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
logger .info ("Bio++ Site Analysis" )
logger .info ("Models to be run: {:s}" .format (", " .join (model for model in lModels )))
logger .info ("Bppml parameter file: {:s}" .format (bppFile ))
logger .info ("Bppmixedlikelihood parameter file: {:s}" .format (bppMixed ))
nodes = PSPFunc .nbNode (treeFile , logger )
## Bppml
@@ -19,7 +18,7 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
INPUTFILE - alignement file
FORMAT - format of the aln file (here, phyx)
TREEFILE - tree file for the analyzed aln
MODEL - choose which model you want run on the data YNGP_M0 through 8, same models as PAML
MODEL - choose which model you want run on the data YNGP_M0 through 8, same models as PAML, and DFP07 models
NODES - number of nodes in the tree file
IGNORE - parameters to ignore for optimization, for example if one is fixated (ex: omegas in M8a)
OUTTREE - name of the optimized output tree
@@ -36,52 +35,48 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
dModelTrees = {model :outFileName + "_" + model + ".dnd" for model in lModels }
dModelParams = {model :outFileName + "_" + model + ".params" for model in lModels }
dModelLog = {model :outFileName + "_optimization_" + model for model in lModels }
dModelSyntax = {model :["YNGP_" + model ,"frequencies=F3X4(initFreqs=observed)" ] for model in lModels } # dictionary model number - [MODEL name, MODEL arguments for bppml]
dModelSyntax = {model :["YNGP_" + model ,"frequencies=F3X4(initFreqs=observed)" ] for model in lModels if model [0 ]== "M" } # dictionary model number - [MODEL name, MODEL arguments for bppml]
dModelSyntax .update ({model :[model [:5 ],"protmodel=JTT92" , "frequencies=F3X4(initFreqs=observed)" , "p0=1" ] for model in lModels if model [:5 ]== "DFP07" })
dLogLlh = {} # dictionary(model:logllh)
for model in lModels :
# take into account the specificities of each model (number of classes n for example)
if model == "M7" or model == "M8" :
dModelSyntax [model ].append ("n=4" )
if len (model ) > 2 :
if model [ 0 ] == "M" and len (model ) > 2 :
dModelSyntax [model ][0 ] = dModelSyntax [model ][0 ][:- 1 ]
dModelSyntax [model ].append ("omegas=1" )
# if M0 optimization in models, use tree optimized in M0 for subsequent model optimizations
lignore = []
if model != "M0" and "M0" in lModels :
treeFile = dModelTrees ["M0" ]+ "_1"
lignore .append ("BrLen" )
if model == "M8a" :
lignore .append ("YNGP_M8.omegas*" )
# do not re-optimize root & equilibrium if done before
if lModels .index (model )> 0 :
lignore .append ("Ancient" )
for i in range (1 ,4 ):
if model == "M0" :
lignore .append ("YN98.%d_Full.theta*" )
else :
lignore .append ("YNGP_%s.%d_Full.theta*" % (model ,i ))
ignore = "," .join (lignore )
# Use previous backup file (in order M0->M1->M2->M7->M8) to accelerate optimization
# dictionary of equivalences of specific parameter names between models
dequiv = {}
## omega from M0->M1->M2->M7
## omega from M0->M1->M2->M7 & M0->DFP07
dequiv ["omega" ] = {"M1" :{"YNGP_M1.omega" :"omega" },
"M2" :{"YNGP_M2.omega0" :"omega" },
"M0" :{"YN98.omega" :"omega" },
"M7" :{"YNGP_M7.p" :"[omega/(1-omega),1][omega==1]" ,"YNGP_M7.q" :"1" },
"M8" :{"YNGP_M8.p" :"[omega/(1-omega),1][omega==1]" ,"YNGP_M8.q" :"1" }}
"M2" :{"YNGP_M2.omega0" :"omega" },
"M0" :{"YN98.omega" :"omega" },
"M7" :{"YNGP_M7.p" :"[omega/(1-omega),1][omega==1]" ,"YNGP_M7.q" :"1" },
"M8" :{"YNGP_M8.p" :"[omega/(1-omega),1][omega==1]" ,"YNGP_M8.q" :"1" },
"DFP07_0" :{"DFP07.omega" :"omega" },
"DFP07" :{"DFP07.omega" :"omega" }}
dnewpar = {}
if not os .path .exists (dModelLog [model ]):
for prevmodel in ["M7" ,"M2" ,"M1" ,"M0" ]:
if not prevmodel in lModels :
continue
if os .path .exists (dModelLog [prevmodel ]+ ".def" ):
prevmodel = ""
if model [0 ]== "M" :
for prevmodel in ["M7" ,"M2" ,"M1" ,"M0" ]:
if not prevmodel in lModels or not os .path .exists (dModelLog [prevmodel ]+ ".def" ):
prevmodel = ""
else :
break
elif model [:5 ]== "DFP07" :
for prevmodel in ["DFP07_0" ,"M0" ]:
if not prevmodel in lModels or not os .path .exists (dModelLog [prevmodel ]+ ".def" ):
prevmodel = ""
else :
break
if prevmodel != "" :
logger .info ("Optimization for model " + model + " uses optimized parameters from model " + prevmodel )
fprev = open (dModelLog [prevmodel ]+ ".def" ,"r" )
lprev = list (fprev .readlines ())
@@ -90,11 +85,15 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
dprevpar = {l [:l .find ("=" )]:l [l .find ("=" )+ 1 :] for l in lprev }
# first copy all parameters
for st ,val in dprevpar .items ():
for st ,val in dprevpar .items ():
if prevmodel == "M0" :
nst = st .replace ("YN98" ,"YNGP_" + model )
if model [0 ]== "M" :
nst = st .replace ("YN98" ,"YNGP_" + model )
else :
nst = st .replace ("YN98" ,"DFP07" )
else :
nst = st .replace (prevmodel ,model )
if not nst in dnewpar .keys ():
dnewpar [nst ]= val
@@ -113,26 +112,47 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
if not nname in dnewpar .keys ():
dnewpar [nname ]= nval
# break
# write in backup file
if len (dnewpar )!= 0 :
fnew = open (dModelLog [model ],"w" )
for k ,v in dnewpar .items ():
fnew .write (k + "=" + v .strip ()+ "\n " )
fnew .close ()
# break
# write in backup file
if len (dnewpar )!= 0 :
fnew = open (dModelLog [model ],"w" )
for k ,v in dnewpar .items ():
fnew .write (k + "=" + v .strip ()+ "\n " )
fnew .close ()
# if M0 optimization in models, use tree optimized in M0 for subsequent model optimizations
lignore = []
if model != "M0" and "M0" in lModels :
treeFile = dModelTrees ["M0" ]+ "_1"
lignore .append ("BrLen" )
if model == "M8a" :
lignore .append ("YNGP_M8.omegas*" )
if model == "DFP07_0" :
lignore .append ("DFP07.p0_1" )
# do not re-optimize root & equilibrium if done before
if prevmodel != "" :
logger .info ("Optimization for model " + model + " does not re-optimize root frequencies" )
lignore .append ("Ancient" )
logger .info ("Optimization for model " + model + " does not re-optimize equilibrium frequencies" )
lignore .append ("*_Full.theta*" )
ignore = "," .join (lignore )
# create dictionary with all elements of the two argument lists to build commands
modelDesc = dModelSyntax [model ][0 ]+ "(" + "," .join (dModelSyntax [model ][1 :])+ ")"
dBppCmd = {"INPUTFILE" :alnFile ,
"FORMAT" :alnFormat ,
"TREEFILE" :treeFile ,
"MODEL" :modelDesc ,
"NODES" :nodes ,
"IGNORE" :ignore ,
"OUTTREE" :dModelTrees [model ],
"OUTPARAMS" :dModelParams [model ],
"BACKUP" :dModelLog [model ],
"param" :bppFile }
"FORMAT" :alnFormat ,
"TREEFILE" :treeFile ,
"MODEL" :modelDesc ,
"NODES" :nodes ,
"IGNORE" :ignore ,
"OUTTREE" :dModelTrees [model ],
"OUTPARAMS" :dModelParams [model ],
"BACKUP" :dModelLog [model ],
"param" :bppFile }
# running bppml
logger .info ("Running {:s} optimization" .format (model ))
@@ -175,6 +195,13 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
logger .info ("LRT of M8 vs M8a: {} (Treshold: {})" .format (p88a , ts88a ))
else :
logger .info ("Possible failed optimization, likelihoods have not been computed." )
if "DFP07" and "DFP07_0" in lModels :
if "DFP07" and "DFP07_0" in dLogLlh :
LRDFP , pDFP = PSPFunc .LRT (dLogLlh ["DFP07_0" ], dLogLlh ["DFP07" ], 1 )
tsDFP = 0.5 * pDFP + 0.5
logger .info ("LRT of DFP07 vs DFP07_07: {} (Treshold: {})" .format (pDFP , tsDFP ))
else :
logger .info ("Possible failed optimization, likelihoods have not been computed." )
# Bppmixedlikelihoods
"""
@@ -194,18 +221,18 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
else :
treeFile = dModelTrees [model ]+ "_1"
if model == "M0" :
if model in [ "M0" , "DFP07_0" ] :
continue
# dictionary(model:results file name)
dModelResults = {model :outSite + baseName + "_results " + model + ".log" for model in lModels }
dModelResults = {model :outSite + baseName + "_results_ " + model + ".log" for model in lModels }
dMixCmd = {"INPUTFILE" :alnFile ,
"FORMAT" :alnFormat ,
"TREEFILE" :treeFile ,
"params" :dModelParams [model ],
"OUTINFO" :dModelResults [model ],
"param" :bppMixed }
"FORMAT" :alnFormat ,
"TREEFILE" :treeFile ,
"params" :dModelParams [model ],
"OUTINFO" :dModelResults [model ],
"param" :bppMixed }
logger .info ("Running mixed likelihoods with model {:s}" .format (model ))
argsMx = "\" " + "\" \" " .join ([k + "=" + v for k , v in dMixCmd .items ()])+ "\" "