Skip to content
Permalink
Browse files

Merge branch 'master' of https://github.com/lgueguen/DGINN

  • Loading branch information
lgueguen committed Jan 19, 2021
2 parents ea4dba8 + 3be8a7d commit 48e64bbd59d5c922a6dea030c6e1eaf1d6575166
Showing with 187 additions and 178 deletions.
  1. +8 −5 DGINN.py
  2. +4 −3 etc/parseResFunc.py
  3. +2 −3 examples/parameters_possel.txt
  4. +6 −23 lib/AnalysisFunc.py
  5. +44 −45 lib/Init.py
  6. +38 −41 lib/PosSelFunc.py
  7. +85 −58 lib/SiteAnalysis.py
@@ -137,8 +137,8 @@
parameters["duplication"])

dAlTree = AnalysisFunc.phyMLTree(Data,
parameters["phymlOpt"])

parameters["phymlOpt"])
elif lSteps[i] == "duplication" and parameters["duplication"]:
if parameters["step"] == "duplication":
Data, dAlTree = LoadFileFunc.duplPSEntry(Data)
@@ -149,6 +149,7 @@
firstStep,
parameters["duplication"])
dAlTree[Data.aln]=dTree

dAlTree = AnalysisFunc.checkPhyMLTree(Data,
dAlTree,
parameters["nbspecies"],
@@ -175,13 +176,15 @@
parameters)

listArgsPosSel = []
fAT= open(Data.o+"files_list.txt", "w")
for aln in dAlTree:
listArgs = [Data,
parameters,
aln,
dAlTree[aln]]
parameters,
aln,
dAlTree[aln]]
listArgsPosSel.append(listArgs)


with open(Data.o+"files_list.txt", "w") as fAT:
if len(dAlTree[aln]): # only if it exists
fAT.write(aln+"\t"+dAlTree[aln])
@@ -133,11 +133,11 @@ def ResBpp(baseName, posDir, pr):
dSA = {}
dSAres = {}
dLogLlh = {}
lModels = ["M1","M2","M7","M8"]
lModels = ["M1","M2","M7","M8", "DFP07_0", "DFP07"]

for model in lModels:
dSA[model] = posDir+"/bpp_site/"+baseName+"_"+model+".params"
dSAres[model] = posDir+"/bpp_site/"+baseName+"_results"+model+".log"
dSAres[model] = posDir+"/bpp_site/"+baseName+"_results_"+model+".log"
if os.path.exists(dSA[model]):
with open(dSA[model], "r") as params:
dLogLlh[model] = float(params.readline().strip().split("= ")[-1])
@@ -147,8 +147,9 @@ def ResBpp(baseName, posDir, pr):

m1m2 = ResBppExtract("M1 M2", dLogLlh, dSAres, posDir, baseName, pr)
m7m8 = ResBppExtract("M7 M8", dLogLlh, dSAres, posDir, baseName, pr)
dfp = ResBppExtract("DFP07_0 DFP07", dLogLlh, dSAres, posDir, baseName, pr)

return(m1m2, m7m8, dLogLlh)
return(m1m2, m7m8, dfp, dLogLlh)

def ResPamlExtract(models, dModelLlh, dModelFile, pr):
model1, model2 = models.split(" ")[0], models.split(" ")[1]
@@ -94,9 +94,8 @@ paml:False
# Can also be used to indicate the path to a BIO++ parameter file
bppml:True

# Same as previously, but for extracting results from the results
# computed from bppml
mixedlikelihood:True
# Same as previously, but for extracting results from the results computed from bppml
mixedlikelihood: True

# Option for using BIO++ for the detection of branches under positive selection
# If True, parameter file will be automatically generated
@@ -411,31 +411,13 @@ def runGARD(aln, o, hostFile, logger):
outGard = gardRes+"_out"
errGard = gardRes+"_err"

# valid for hyphy 2.3
batchFile = o+aln.split("/")[-1].split(".")[0]+"_gard.bf"
with open(batchFile, "w") as bf:
bf.write("inputRedirect = {};\n")
bf.write("inputRedirect[\"01\"] = \"{:s}\";\n".format(aln))
bf.write("inputRedirect[\"02\"] = \"010010\";\n")
bf.write("inputRedirect[\"03\"] = \"None\";\n")
bf.write("inputRedirect[\"04\"] = \"{:s}\";\n".format(gardRes))
bf.write("ExecuteAFile(HYPHY_LIB_DIRECTORY + \"TemplateBatchFiles\" + DIRECTORY_SEPARATOR + \"GARD.bf\", inputRedirect);\n")
bf.close()
logger.debug("Batch file: {:s}".format(batchFile))

"""
#for hyphy 2.5
if hostFile == "":
cmd = "mpirun -np 2 HYPHYMPI GARD --type codon --model HKY --alignment {:s} --output {:s} --output-lf {:s}".format(aln, gardRes, gardJson)
cmd = "mpirun -np 4 HYPHYMPI GARD --alignment {:s} --output {:s} --output-lf {:s}".format(aln, gardJson, gardRes)
else:
cmd = "mpirun -np 2 -hostfile {:s} HYPHYMPI GARD --type codon --model HKY --alignment {:s} --output {:s} --output-lf {:s}".format(hostfile, aln, gardRes, gardJson)
"""

if hostFile == "":
cmd = "mpirun -np 4 -merge-stderr-to-stdout -output-filename {:s} HYPHYMPI {:s} ".format(o + "errgard", batchFile)
else:
cmd = "mpirun -np 4 -hostfile {:s} -merge-stderr-to-stdout -output-filename {:s} HYPHYMPI {:s}".format(hostFile, o + "errgard", batchFile)

cmd = "mpirun -np 4 -hostfile {:s} HYPHYMPI GARD --alignment {:s} --output {:s} --output-lf {:s}".format(hostfile, aln, gardJson, gardRes)

lCmd = shlex.split(cmd)
with open(outGard, "w") as o, open(errGard, "w") as e:
runGARD = subprocess.run(lCmd, shell=False, check=True, stdout=o, stderr=e)
@@ -446,6 +428,7 @@ def runGARD(aln, o, hostFile, logger):
return(gardJson)



def procGARD(gardRes, aln):
"""
Function creating the batch file to run GARD processor for KH test (Kosakovsky Pond et al., 2006).
@@ -542,9 +525,9 @@ def parseGard(kh, aln, o, logger):
for x in range(1,len(lBP)):
dFrag = {}
if lBP[x-1] == 0:
extension = "{:d}to{:d}".format(lBP[x-1], lBP[x])
extension = "_{:d}_{:d}".format(lBP[x-1], lBP[x])
else:
extension = "{:d}to{:d}".format(lBP[x-1]-1, lBP[x])
extension = "_{:d}_{:d}".format(lBP[x-1]-1, lBP[x])

outFrag = o+aln.split("/")[-1].split(".")[0]+"_frag"+extension+".best.fas"
for name in lNameGene:
@@ -112,7 +112,7 @@ def paramDef(params, inf, queryName, outdir):
else:
dParams[temp[0]] = temp[1].strip()
content.close()

#If infile(s) given through command line, takes priority
if inf != "":
dParams["infile"] = list(map(str.strip,inf.split(",")))
@@ -160,15 +160,15 @@ def paramDef(params, inf, queryName, outdir):

#Check if parameters are correct
lSteps = ["blast",
"accessions",
"fasta",
"orf",
"alignment",
"tree",
"duplication",
"recombination",
"positiveSelection",
""]
"accessions",
"fasta",
"orf",
"alignment",
"tree",
"duplication",
"recombination",
"positiveSelection",
""]

if "step" not in dParams or dParams["step"] not in lSteps:
print("Step \""+dParams["step"]+"\" not available, set to blast by default.")
@@ -192,7 +192,6 @@ def paramDef(params, inf, queryName, outdir):
elif "alnfile" not in dParams or dParams["alnfile"] == "":
print("The pipeline requires a codon alignment. Please provide one.")
sys.exit()

for opt in ["meme", "busted", "models", "paml", "bppml", "mixedlikelihood", "opb", "gnh"]:
if opt not in dParams:
dParams[opt] = ""
@@ -210,7 +209,7 @@ def paramDef(params, inf, queryName, outdir):
for M in map(str.strip,dParams[opt].split(",")):
if M == "":
next
elif M not in ["M0", "M1", "M2", "M7", "M8", "M8a"]:
elif M not in ["M0", "M1", "M2", "M7", "M8", "M8a", "DFP07_0", "DFP07"]:
print(M + " isn't a valid model.")
else:
ltemp.append(M)
@@ -227,39 +226,39 @@ def paramDef(params, inf, queryName, outdir):

#Creation of a dictionnary with all the parameters
defaultParam = {"infile":"",
"queryName":"",
"queryFile":"",
"blastdb":"",
"outdir":"",
"logfile":"",
"evalue":1e-3,
"mincov":50,
"percID":70,
"maxLen":"cutoff",
"entryQuery":"",
"APIKey":"",
"phymlOpt":"",
"sptree":"",
"duplication":False,
"LBopt":"cutoff",
"nbspecies":8,
"recombination":False,
"remote":False,
"step":"blast",
"positiveSelection":False,
"alnfile":"",
"treefile":"",
"alnformat":"Fasta",
"basename":"",
"hyphySeuil":0.05,
"busted":False,
"meme":False,
"models":"",
"paml":"",
"bppml":"",
"mixedlikelihood":"",
"opb":False,
"gnh":False}
"queryName":"",
"queryFile":"",
"blastdb":"",
"outdir":"",
"logfile":"",
"evalue":1e-3,
"mincov":50,
"percID":70,
"maxLen":"cutoff",
"entryQuery":"",
"APIKey":"",
"phymlOpt":"",
"sptree":"",
"duplication":False,
"LBopt":"cutoff",
"nbspecies":8,
"recombination":False,
"remote":False,
"step":"blast",
"positiveSelection":False,
"alnfile":"",
"treefile":"",
"alnformat":"Fasta",
"basename":"",
"hyphySeuil":0.05,
"busted":False,
"meme":False,
"models":"",
"paml":"",
"bppml":"",
"mixedlikelihood":"",
"opb":False,
"gnh":False}

for i in defaultParam:
if i in dParams.keys() and dParams[i] != "":
@@ -41,12 +41,12 @@ def pspAnalysis(data, parms, aln, tree):
logger.info("Analysis to be run:")

dAnalysis = {"paml": "Site (codeml)",
"BUSTED":"Whole-Gene",
"bppml":"Site (Bio++ - Optimization)",
"bppmixedlikelihood":"Site (Bio++ - Results)",
"OPB":"Branch",
"GNH":"Branch-site on positively selected branches",
"MEME":"Branch-site"}
"BUSTED":"Whole-Gene",
"bppml":"Site (Bio++ - Optimization)",
"bppmixedlikelihood":"Site (Bio++ - Results)",
"OPB":"Branch",
"GNH":"Branch-site on positively selected branches",
"MEME":"Branch-site"}
for key in dCtrls.keys():
logger.info(dAnalysis[key])

@@ -60,42 +60,39 @@ def pspAnalysis(data, parms, aln, tree):
if "MEME" in dCtrls:
try:
BranchAnalysis.memeBranchSite(aln,
cladoFile,
outDir,
data.baseName,
logger)
cladoFile,
outDir,
data.baseName,
logger)
except Exception:
logger.error("MEME encountered an unexpected error, skipping.")

if "bppml" and "bppmixedlikelihood" in dCtrls and len(lModels) > 1:
#try:
SiteAnalysis.bppSite(dCtrls["bppml"],
dCtrls["bppmixedlikelihood"],
aln,
data.alnFormat,
tree,
lModels,
outDir,
data.baseName,
logger)
#except Exception:
#logger.error("Bio++ Site encountered an unexpected error, skipping.")
elif "bppml" or "bppmixedlikelihood" not in dCtrls:
logger.error("Part of parameters for Bio++ site analysis are completed but not all.")
logger.error("Analysis ignored (if unexpected, check paths to Bio++/bpp parameter files).")
elif "bppml" and "bppmixedlikelihood" not in dCtrls:
next
if "bppml" in dCtrls:
# try:
if not dCtrls["bppmixedlikelihood"]:
dCtrls["bppmixedlikelihood"]=dCtrls["bppml"]
SiteAnalysis.bppSite(dCtrls["bppml"],
dCtrls["bppmixedlikelihood"],
aln,
data.alnFormat,
tree,
lModels,
outDir,
data.baseName,
logger)
# except Exception:
# logger.error("Bio++ Site encountered an unexpected error, skipping.")

lPSNodes = []
if "OPB" in dCtrls:
try:
params = BranchAnalysis.bppBranch(dCtrls["OPB"],
outDir,
data.baseName,
aln,
data.alnFormat,
tree,
logger)
outDir,
data.baseName,
aln,
data.alnFormat,
tree,
logger)
except Exception:
logger.error("Bio++ Branch Analysis encountered an unexpected error, skipping.")
try:
@@ -109,14 +106,14 @@ def pspAnalysis(data, parms, aln, tree):
except Exception:
logger.error("Bio++ Pseudo Branch-Site Analysis encountered an unexpected error, skipping.")

if "paml" in dCtrls and dCtrls["paml"] not in ["False", False] and len(lModels) > 1:
if "paml" in dCtrls:
SiteAnalysis.pamlSite(aln,
tree,
lModels,
dCtrls["paml"],
outDir,
data.baseName,
logger)
tree,
lModels,
dCtrls["paml"],
outDir,
data.baseName,
logger)
"""try:
SiteAnalysis.pamlSite(aln, tree, lModels, dCtrls["paml"], outDir, data.baseName, logger)
except Exception:

0 comments on commit 48e64bb

Please sign in to comment.