Skip to content

Commit

Permalink
Merge pull request #111 from rigdenlab/mrbump-modular
Browse files Browse the repository at this point in the history
Mrbump modular
  • Loading branch information
hlasimpk committed Jul 22, 2019
2 parents fc5bdad + 5572d2b commit 8919b6b
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 88 deletions.
93 changes: 57 additions & 36 deletions simbad/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
from pyjob.factory import TaskFactory

# Constants that need to be accessed externally (e.g. by CCP4I2)
SIMBAD_DIRNAME = 'SIMBAD'
SIMBAD_PYRVAPI_SHAREDIR = 'jsrview'
EXPORT = "SET" if os.name == "nt" else "export"
CMD_PREFIX = "call" if os.name == "nt" else ""
SIMBAD_DIRNAME = "SIMBAD"
SIMBAD_PYRVAPI_SHAREDIR = "jsrview"
EXPORT = {"nt": "SET"}.get(os.name, "export")
CMD_PREFIX = {"nt": "call"}.get(os.name, "")

logger = logging.getLogger(__name__)

Expand All @@ -32,28 +32,34 @@ def get_sequence(input_f, output_s):
ps.from_file(input_file=input_f)
seq_info = ps.get_sequence_info

with open(output_s, 'w') as f_out:
for i in seq_info:
f_out.write(">{}".format(i) + os.linesep)
f_out.write(seq_info[i] + os.linesep)
content = "\n".join(">{}\n{}".format(i, seq_info[i]) for i in seq_info)
with open(output_s, "w") as fh:
fh.write(content)


def get_mrbump_ensemble(mrbump_dir, final):
"""Output ensemble from mrbump directory to a dat file"""
if os.path.isdir(mrbump_dir):
ensemble = glob.iglob(os.path.join(mrbump_dir, 'models', 'domain_*', 'ensembles',
'gesamtEnsTrunc_*_100.0_SideCbeta.pdb'))[0]
ensemble = glob.iglob(
os.path.join(
mrbump_dir,
"models",
"domain_*",
"ensembles",
"gesamtEnsTrunc_*_100.0_SideCbeta.pdb",
)
)[0]
convert_pdb_to_dat(ensemble, final)
else:
logger.critical("Directory missing: {}".format(mrbump_dir))
logger.critical("Directory missing: %s", mrbump_dir)


def output_files(run_dir, result, output_pdb, output_mtz):
"""Return output pdb/mtz from best result in result obj"""
pdb_code = result[0]
stem = os.path.join(run_dir, 'output_files', pdb_code)
input_pdb = os.path.join(stem, '{0}_refinement_output.pdb'.format(pdb_code))
input_mtz = os.path.join(stem, '{0}_refinement_output.mtz'.format(pdb_code))
stem = os.path.join(run_dir, "output_files", pdb_code)
input_pdb = os.path.join(stem, "{}_refinement_output.pdb".format(pdb_code))
input_mtz = os.path.join(stem, "{}_refinement_output.mtz".format(pdb_code))
shutil.copyfile(input_pdb, output_pdb)
shutil.copyfile(input_mtz, output_mtz)

Expand All @@ -80,8 +86,7 @@ def summarize_result(results, csv_file=None, columns=None):
if df.empty:
logger.info("No results found")
else:
summary_table = "The results for this search are:\n\n%s\n"
logger.info(summary_table, df.to_string())
logger.info("The results for this search are:\n\n%s\n", df.to_string())


def tmp_dir(directory=None, prefix="tmp", suffix=""):
Expand Down Expand Up @@ -121,24 +126,36 @@ def tmp_file(delete=False, directory=None, prefix="tmp", stem=None, suffix=""):
if directory is None:
directory = tempfile.gettempdir()
if stem is None:
tmpf = tempfile.NamedTemporaryFile(delete=delete, dir=directory, prefix=prefix, suffix=suffix)
tmpf = tempfile.NamedTemporaryFile(
delete=delete, dir=directory, prefix=prefix, suffix=suffix
)
tmpf.close()
return tmpf.name
else:
tmpf = os.path.join(directory, "".join([prefix, stem, suffix]))
if not delete:
open(tmpf, 'w').close()
open(tmpf, "w").close()
return tmpf


def source_ccp4():
"""Function to return bash command to source CCP4"""
if os.name != "nt":
return "source {}".format(os.path.join(os.environ["CCP4"], "bin", "ccp4.setup-sh"))
return None


def submit_chunk(collector, run_dir, nproc, job_name, submit_qtype, submit_queue, permit_nonzero, monitor, success_func):
if os.name == "nt":
return
return "source {}".format(os.path.join(os.environ["CCP4"], "bin", "ccp4.setup-sh"))


def submit_chunk(
collector,
run_dir,
nproc,
job_name,
submit_qtype,
submit_queue,
permit_nonzero,
monitor,
success_func,
):
"""Submit jobs in small chunks to avoid using too much disk space
Parameters
Expand All @@ -160,24 +177,28 @@ def submit_chunk(collector, run_dir, nproc, job_name, submit_qtype, submit_queue
"""

if submit_qtype == 'local':
if submit_qtype == "local":
processes = nproc
array_size = None
else:
processes = None
array_size = nproc

with TaskFactory(submit_qtype,
collector,
cwd=run_dir,
name=job_name,
processes=processes,
max_array_size=array_size,
queue=submit_queue,
permit_nonzero=permit_nonzero,
shell='/bin/bash',
priority=-10) as task:
with TaskFactory(
submit_qtype,
collector,
cwd=run_dir,
name=job_name,
processes=processes,
max_array_size=array_size,
queue=submit_queue,
permit_nonzero=permit_nonzero,
shell="/bin/bash",
priority=-10,
) as task:
task.run()
interval = int(math.log(len(collector.scripts)) / 3)
interval_in_seconds = interval if interval >= 5 else 5
task.wait(interval=interval_in_seconds, monitor_f=monitor, success_f=success_func)
task.wait(
interval=interval_in_seconds, monitor_f=monitor, success_f=success_func
)
128 changes: 76 additions & 52 deletions simbad/util/mtz_util.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
"""Module for MTZ file I/O and manipulation"""

__author__ = "Adam Simpkin & Jens Thomas"
__date__ = "17 May 2017"
__version__ = "0.2"
__author__ = "Adam Simpkin & Jens Thomas & Felix Simkovic"
__date__ = "21 Jul 2019"
__version__ = "0.3"

import logging
import os
import shutil
import sys

sys.path.append(os.path.join(os.environ["CCP4"], "share", "mrbump", "include", "ccp4"))
import MRBUMP_ctruncate

from iotbx import reflection_file_reader
from iotbx.reflection_file_utils import looks_like_r_free_flags_info
from mrbump.ccp4 import MRBUMP_ctruncate
from pyjob import cexec
from pyjob.script import EXE_EXT

logger = logging.getLogger(__name__)
LOG = logging.getLogger(__name__)


def ctruncate(hklin, hklout):
Expand All @@ -31,13 +27,10 @@ def ctruncate(hklin, hklout):

ctr = MRBUMP_ctruncate.Ctruncate()

log_file = hklout.rsplit(".", 1)[0] + '.log'
log_file = hklout.rsplit(".", 1)[0] + ".log"
ctr.setlogfile(log_file)

if mtz_obj.f:
input_f = True
else:
input_f = False
input_f = bool(mtz_obj.f)

if mtz_obj.f or mtz_obj.i:
plus_minus = False
Expand Down Expand Up @@ -69,17 +62,49 @@ def ctruncate(hklin, hklout):
if mtz_obj.i and mtz_obj.f and mtz_obj.free:
shutil.copyfile(hklin, hklout)
elif mtz_obj.i and mtz_obj.free:
ctr.ctruncate(hklin, hklout, ctr_colin, ctr_colin_sig, colout="from_SIMBAD", colinFREE=mtz_obj.free,
USEINTEN=True, INPUTF=input_f, PLUSMINUS=plus_minus)
ctr.ctruncate(
hklin,
hklout,
ctr_colin,
ctr_colin_sig,
colout="from_SIMBAD",
colinFREE=mtz_obj.free,
USEINTEN=True,
INPUTF=input_f,
PLUSMINUS=plus_minus,
)
elif mtz_obj.i and not mtz_obj.free:
ctr.ctruncate(hklin, hklout, ctr_colin, ctr_colin_sig, colout="from_SIMBAD", USEINTEN=True, INPUTF=input_f,
PLUSMINUS=plus_minus)
ctr.ctruncate(
hklin,
hklout,
ctr_colin,
ctr_colin_sig,
colout="from_SIMBAD",
USEINTEN=True,
INPUTF=input_f,
PLUSMINUS=plus_minus,
)
elif mtz_obj.free:
ctr.ctruncate(hklin, hklout, ctr_colin, ctr_colin_sig, colout="from_SIMBAD", colinFREE=mtz_obj.free,
USEINTEN=False, PLUSMINUS=plus_minus)
ctr.ctruncate(
hklin,
hklout,
ctr_colin,
ctr_colin_sig,
colout="from_SIMBAD",
colinFREE=mtz_obj.free,
USEINTEN=False,
PLUSMINUS=plus_minus,
)
else:
ctr.ctruncate(hklin, hklout, ctr_colin, ctr_colin_sig, colout="from_SIMBAD", USEINTEN=False,
PLUSMINUS=plus_minus)
ctr.ctruncate(
hklin,
hklout,
ctr_colin,
ctr_colin_sig,
colout="from_SIMBAD",
USEINTEN=False,
PLUSMINUS=plus_minus,
)


def reindex(hklin, hklout, sg):
Expand Down Expand Up @@ -127,11 +152,6 @@ class GetLabels(object):
Attributes
----------
mtz_file : str
The path to the mtz file
Returns
-------
f : str
f column label
sigf : str
Expand Down Expand Up @@ -186,8 +206,8 @@ def __init__(self, mtz_file):
def run(self, mtz_file):
reflection_file = reflection_file_reader.any_reflection_file(file_name=mtz_file)
if not reflection_file.file_type() == "ccp4_mtz":
msg="File is not of type ccp4_mtz: {0}".format(mtz_file)
logging.critical(msg)
msg = "File is not of type ccp4_mtz: {0}".format(mtz_file)
LOG.critical(msg)
raise RuntimeError(msg)

miller_arrays = reflection_file.as_miller_arrays()
Expand All @@ -198,36 +218,47 @@ def run(self, mtz_file):
elif self.check_anomalous(m_a):
if self.check_for_dano_labels(m_a):
if len(m_a.info().labels) == 5:
self.f, self.sigf, self.dano, self.sigdano, isym = m_a.info().labels
self.f, self.sigf, self.dano, self.sigdano, isym = (
m_a.info().labels
)
elif len(m_a.info().labels) == 4:
self.f, self.sigf, self.dano, self.sigdano = m_a.info().labels
elif len(m_a.info().labels) == 2:
self.dano, self.sigdano = m_a.info().labels
else:
msg = "Unexpected number of columns found in anomalous miller array"
logging.debug(msg)
LOG.debug(
"Unexpected number of columns found in anomalous miller array"
)
elif self.check_for_plus_minus_labels(m_a):
if m_a.is_xray_amplitude_array():
self.fplus, self.sigfplus, self.fminus, self.sigfminus = m_a.info().labels
self.fplus, self.sigfplus, self.fminus, self.sigfminus = (
m_a.info().labels
)
elif m_a.is_xray_intensity_array():
self.iplus, self.sigiplus, self.iminus, self.sigiminus = m_a.info().labels
self.iplus, self.sigiplus, self.iminus, self.sigiminus = (
m_a.info().labels
)
else:
msg = "Type of anomalous miller array unknown"
logging.debug(msg)
LOG.debug("Type of anomalous miller array unknown")
else:
msg = "Type of anomalous miller array unknown"
logging.debug(msg)
elif m_a.is_xray_intensity_array() and len(m_a.info().labels) == 2 and not self.i:
LOG.debug("Type of anomalous miller array unknown")
elif (
m_a.is_xray_intensity_array()
and len(m_a.info().labels) == 2
and not self.i
):
self.i, self.sigi = m_a.info().labels
elif m_a.is_xray_amplitude_array() and len(m_a.info().labels) == 2 and not self.f:
elif (
m_a.is_xray_amplitude_array()
and len(m_a.info().labels) == 2
and not self.f
):
self.f, self.sigf = m_a.info().labels
else:
pass

def check_anomalous(self, miller_array):
if miller_array.anomalous_flag():
return True
elif miller_array.info().type_hints_from_file == 'anomalous_difference':
elif miller_array.info().type_hints_from_file == "anomalous_difference":
return True
# Check for anomalous miller arrays which aren't properly labeled
elif self.check_for_dano_labels(miller_array):
Expand All @@ -238,15 +269,8 @@ def check_anomalous(self, miller_array):

@staticmethod
def check_for_dano_labels(miller_array):
if any(['DANO' in i.upper() for i in miller_array.info().labels]):
return True
return False
return any(["DANO" in i.upper() for i in miller_array.info().labels])

@staticmethod
def check_for_plus_minus_labels(miller_array):
if any(['(+)' in i for i in miller_array.info().labels]):
return True
return False



return any(["(+)" in i for i in miller_array.info().labels])

0 comments on commit 8919b6b

Please sign in to comment.