@@ -10,7 +10,6 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
logger.info("Models to be run: {:s}".format(", ".join(model for model in lModels)))
logger.info("Bppml parameter file: {:s}".format(bppFile))

nodes = PSPFunc.nbNode(treeFile, logger)
## Bppml
"""
Optimize tree and model using bppml
@@ -19,7 +18,6 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
FORMAT - format of the aln file (here, phyx)
TREEFILE - tree file for the analyzed aln
MODEL - choose which model you want run on the data YNGP_M0 through 8, same models as PAML, and DFP07 models
NODES - number of nodes in the tree file
IGNORE - parameters to ignore for optimization, for example if one is fixated (ex: omegas in M8a)
OUTTREE - name of the optimized output tree
OUTPARAMS - name of the output file summarizing parameters
@@ -49,114 +47,28 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
dModelSyntax[model].append(["p0=1","p0=0.1"][model=="DFP07"])
dLogLlh = {} # dictionary(model:logllh)

# Use previous backup file (in order M0->M1->M2->M7->M8) to accelerate optimization
# dictionary of equivalences of specific parameter names between models
dequiv={}
## omega from M0->M1->M2->M7 & M0->DFP07
dequiv["omega"] = {"M1":{"YNGP_M1.omega":"omega"},
"M2":{"YNGP_M2.omega0":"omega"},
"M0":{"YN98.omega":"omega"},
"M7":{"YNGP_M7.p":"[omega/(1-omega),1][omega==1]"},
"M8":{"YNGP_M8.p":"[omega/(1-omega),1][omega==1]"},
"DFP07_0":{"DFP07.omega":"omega"},
"DFP07":{"DFP07.omega":"omega"}}
dequiv["p0"] = {"DFP07_0":{"DFP07.p0":"1"},
"DFP07":{"DFP07.p0":"0.1"}}

for model in lModels:
### new values for parameters
dnewpar={}

if not os.path.exists(dModelLog[model]):
prevmodel = ""
if model[0]=="M":
for prevmodel in ["M7","M2","M1","M0"]:
if not prevmodel in lModels or not os.path.exists(dModelLog[prevmodel]+".def"):
prevmodel=""
else:
break
elif model[:5]=="DFP07":
for prevmodel in ["DFP07_0","M0"]:
if not prevmodel in lModels or not os.path.exists(dModelLog[prevmodel]+".def"):
prevmodel=""
else:
break


if prevmodel!="":
logger.info("Optimization for model " + model + " uses optimized parameters from model " + prevmodel)
fprev=open(dModelLog[prevmodel]+".def","r")
lprev=list(fprev.readlines())
fprev.close()

dprevpar={l[:l.find("=")]:l[l.find("=")+1:] for l in lprev}

# first copy all parameters
for st,val in dprevpar.items():
if prevmodel=="M0":
if model[0]=="M":
nst=st.replace("YN98","YNGP_"+model)
else:
nst=st.replace("YN98","DFP07")
else:
nst=st.replace(prevmodel,model)

if not nst in dnewpar.keys():
dnewpar[nst]=val

# And then for specific parameters
for key, par in dequiv.items():
if model in par.keys() and prevmodel in par.keys():
parav=par[prevmodel]
parap=par[model]
for oname,oval in dprevpar.items():
## look which oname is in equivalence list
for kparav in parav.keys():
if oname.startswith(kparav+"_"):
for npar, nexp in parap.items():
nname=oname.replace(kparav,npar)
nval=str(eval(nexp.replace(key,oval).strip()))
if True:#not nname in dnewpar.keys():
dnewpar[nname]=nval

# break
# write in backup file
logger.info(str(dnewpar))
if len(dnewpar)!=0:
fnew=open(dModelLog[model],"w")
for k,v in dnewpar.items():
fnew.write(k+"="+v.strip()+"\n")
fnew.close()

# if M0 optimization in models, use tree optimized in M0 for subsequent model optimizations
lignore=[]
if model!="M0" and "M0" in lModels:
treeFile = dModelTrees["M0"]+"_1"
lignore.append("BrLen")

if model == "M8a":
lignore.append("YNGP_M8.omegas*")

if model=="DFP07_0":
lignore.append("DFP07.p0_1")

# do not re-optimize root & equilibrium if done before
if prevmodel!="":
logger.info("Optimization for model " + model + " does not re-optimize root frequencies" )
lignore.append("Ancient")

logger.info("Optimization for model " + model + " does not re-optimize equilibrium frequencies" )
lignore.append("*_Full.theta*")
prevmodel, dnewpar = getNewParfromOptim(model, lModels, dModelLog, logger)
if prevmodel != "":
fnew=open(dModelLog[model],"w")
for k,v in dnewpar.items():
fnew.write(k+"="+v.strip()+"\n")
fnew.close()

ignore = ",".join(lignore)
lignore = setIgnoreParams(model, prevmodel, lModels, logger)
ignore = ",".join(lignore)
else:
ignore = ""

if model!="M0" and "M0" in lModels:
treeFile = dModelTrees["M0"]+"_1"

# create dictionary with all elements of the two argument lists to build commands
modelDesc=dModelSyntax[model][0]+"("+",".join(dModelSyntax[model][1:])+")"
dBppCmd = {"INPUTFILE":alnFile,
"FORMAT":alnFormat,
"TREEFILE":treeFile,
"MODEL":modelDesc,
"NODES":nodes,
"IGNORE":ignore,
"OUTTREE":dModelTrees[model],
"OUTPARAMS":dModelParams[model],
@@ -249,6 +161,104 @@ def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir, ba
logger.debug(subprocess.PIPE)


def getNewParfromOptim(model, lModels, dModelLog, logger): ### new values for parameters
# Use previous backup file (in order M0->M1->M2->M7->M8) to accelerate optimization
# dictionary of equivalences of specific parameter names between models
dequiv={}
## omega from M0->M1->M2->M7 & M0->DFP07
dequiv["omega"] = {"M1":{"YNGP_M1.omega":"omega"},
"M2":{"YNGP_M2.omega0":"omega"},
"M0":{"YN98.omega":"omega"},
"M7":{"YNGP_M7.p":"[omega/(1-omega),1][omega==1]"},
"M8":{"YNGP_M8.p":"[omega/(1-omega),1][omega==1]"},
"DFP07_0":{"DFP07.omega":"omega"},
"DFP07":{"DFP07.omega":"omega"}}
dequiv["p0"] = {"DFP07_0":{"DFP07.p0":"1"},
"DFP07":{"DFP07.p0":"0.1"}}

dnewpar={}

prevmodel = ""
if not os.path.exists(dModelLog[model]):
if model[0]=="M":
for prevmodel in ["M7","M2","M1","M0"]:
if not prevmodel in lModels or not os.path.exists(dModelLog[prevmodel]+".def"):
prevmodel=""
else:
break
elif model[:5]=="DFP07":
for prevmodel in ["DFP07_0","M0"]:
if not prevmodel in lModels or not os.path.exists(dModelLog[prevmodel]+".def"):
prevmodel=""
else:
break


if prevmodel!="":
logger.info("Optimization for model " + model + " uses optimized parameters from model " + prevmodel)
fprev=open(dModelLog[prevmodel]+".def","r")
lprev=list(fprev.readlines())
fprev.close()

dprevpar={l[:l.find("=")]:l[l.find("=")+1:] for l in lprev}

# first copy all parameters
for st,val in dprevpar.items():
if prevmodel=="M0":
if model[0]=="M":
nst=st.replace("YN98","YNGP_"+model)
else:
nst=st.replace("YN98","DFP07")
else:
nst=st.replace(prevmodel,model)

if not nst in dnewpar.keys():
dnewpar[nst]=val

# And then for specific parameters
for key, par in dequiv.items():
if model in par.keys() and prevmodel in par.keys():
parav=par[prevmodel]
parap=par[model]
for oname,oval in dprevpar.items():
## look which oname is in equivalence list
for kparav in parav.keys():
if oname.startswith(kparav+"_"):
for npar, nexp in parap.items():
nname=oname.replace(kparav,npar)
nval=str(eval(nexp.replace(key,oval).strip()))
if True:#not nname in dnewpar.keys():
dnewpar[nname]=nval

# break
# write in backup file
logger.debug(str(dnewpar))

return prevmodel, dnewpar

def setIgnoreParams(model, prevmodel, lModels, logger):
# if M0 optimization in models, use tree optimized in M0 for subsequent model optimizations
lignore=[]
if model!="M0" and "M0" in lModels:
lignore.append("BrLen")

if model == "M8a":
lignore.append("YNGP_M8.omegas*")

if model=="DFP07_0":
lignore.append("DFP07.p0_1")

# do not re-optimize root & equilibrium if done before
if prevmodel!="":
logger.info("Optimization for model " + model + " does not re-optimize root frequencies" )
lignore.append("Ancient")

logger.info("Optimization for model " + model + " does not re-optimize equilibrium frequencies" )
lignore.append("*_Full.theta*")

return lignore


def pamlSite(alnFile, treeFile, lModels, pamlParams, outDir, baseName, logger):

tree = EvolTree(treeFile)