Skip to content

Commit

Permalink
Merge pull request #84 from rigdenlab/single_model_bfactor
Browse files Browse the repository at this point in the history
Single model bfactor & py2/3 compatibility
  • Loading branch information
hlasimpk committed Jan 31, 2021
2 parents bc4a7df + 0b3fe7d commit 2bd60be
Show file tree
Hide file tree
Showing 31 changed files with 292 additions and 156 deletions.
2 changes: 1 addition & 1 deletion ample/__main__.py
Expand Up @@ -17,5 +17,5 @@
try:
main.Ample().main()
except Exception as e:
msg = "Error running main AMPLE program: {0}".format(e.message)
msg = "Error running main AMPLE program: {0}".format(e)
exit_util.exit_error(msg, sys.exc_info()[2])
2 changes: 1 addition & 1 deletion ample/ensembler/__init__.py
Expand Up @@ -516,7 +516,7 @@ def add(self, e):

def magic(key, data):
"""Magic binning happening here"""
container = {k[key]: [] for k in zip(*data)[1]}
container = {k[key]: [] for k in list(zip(*data))[1]}
for e in data:
container[e[1][key]].append(e)
mulleimer = []
Expand Down
6 changes: 3 additions & 3 deletions ample/ensembler/_ensembler.py
Expand Up @@ -402,9 +402,9 @@ def edit_side_chains(self, raw_ensemble, side_chain_treatments=None, homologs=Fa
ensemble.pdb = fpath
ensemble.ensemble_num_atoms = natoms
# check
assert ensemble.num_residues == nresidues, "Unmatching number of residues: {0} : {1}".format(
ensemble.num_residues, nresidues
)
# assert ensemble.num_residues == nresidues, "Unmatching number of residues: {0} : {1}".format(
# ensemble.num_residues, nresidues
# )
ensembles.append(ensemble)

return ensembles
Expand Down
6 changes: 5 additions & 1 deletion ample/ensembler/abinitio.py
Expand Up @@ -7,6 +7,7 @@
import logging
import os
import shutil
import sys

from ample.ensembler import _ensembler
from ample.ensembler import cluster_util
Expand Down Expand Up @@ -250,7 +251,10 @@ def generate_ensembles_from_amoptd(self, models, amoptd):
}

# strip out any that are None
kwargs = {k: v for k, v in kwargs.iteritems() if v is not None}
if sys.version_info.major == 3:
kwargs = {k: v for k, v in kwargs.items() if v is not None}
else:
kwargs = {k: v for k, v in kwargs.iteritems() if v is not None}

ensembles = self.generate_ensembles(models, **kwargs)

Expand Down
7 changes: 5 additions & 2 deletions ample/ensembler/homologs.py
Expand Up @@ -66,7 +66,7 @@ def align_gesamt(models, gesamt_exe=None, work_dir=None):
if len(seqd) != 1:
msg = "Model {0} does not contain a single chain, got: {1}".format(*seqd.keys())
raise RuntimeError(msg)
model2chain[m] = seqd.keys()[0]
model2chain[m] = list(seqd.keys())[0]

basename = 'gesamt'
logfile = os.path.join(work_dir, 'gesamt.log')
Expand Down Expand Up @@ -229,5 +229,8 @@ def generate_ensembles_from_amoptd(self, models, amoptd):
'homolog_aligner': amoptd['homolog_aligner'],
}
# strip out any that are None
kwargs = {k: v for k, v in kwargs.iteritems() if v is not None}
if sys.version_info.major == 3:
kwargs = {k: v for k, v in kwargs.items() if v is not None}
else:
kwargs = {k: v for k, v in kwargs.iteritems() if v is not None}
return self.generate_ensembles(models, **kwargs)
138 changes: 95 additions & 43 deletions ample/ensembler/single_model.py
@@ -1,12 +1,15 @@
"""Ensembler module for single model structures"""

__author__ = "Felix Simkovic, and Jens Thomas"
__author__ = "Felix Simkovic, Jens Thomas and Adam Simpkin"
__date__ = "16 Feb 2016"
__version__ = "1.0"

import gemmi
import logging
import math
import os
import pandas as pd
import sys

from ample.ensembler import _ensembler, truncation_util
from ample.ensembler.constants import SIDE_CHAIN_TREATMENTS
Expand Down Expand Up @@ -57,17 +60,16 @@ def generate_ensembles(
logger.critical(msg)
raise RuntimeError(msg)

if len(truncation_scorefile_header) < 2:
msg = "At least two header options for scorefile are required"
logger.critical(msg)
raise RuntimeError(msg)

# standardise the structure
std_models_dir = os.path.join(self.work_dir, "std_models")
os.mkdir(std_models_dir)

std_model = ample_util.filename_append(models[0], 'std', std_models_dir)
pdb_edit.standardise(pdbin=models[0], pdbout=std_model, del_hetatm=True)
pdb_edit.standardise(pdbin=models[0], pdbout=std_model)

if truncation_method == truncation_util.TRUNCATION_METHODS.ERRORS:
self._modify_bfactors(std_model, std_model)

std_models = [std_model]
logger.info('Standardised input model: %s', std_models[0])

Expand All @@ -79,47 +81,71 @@ def generate_ensembles(
if not os.path.isdir(truncate_dir):
os.mkdir(truncate_dir)

# Read all the scores into a per residue dictionary
assert len(truncation_scorefile_header) > 1, "At least two column labels are required"
residue_scores = self._read_scorefile(truncation_scorefile)
residue_key = truncation_scorefile_header.pop(0)
truncation_scorefile_header = map(str.strip, truncation_scorefile_header)
assert all(
h in residue_scores[0] for h in truncation_scorefile_header
), "Not all column labels are in your CSV file"
if truncation_method == truncation_util.TRUNCATION_METHODS.SCORES:
if len(truncation_scorefile_header) < 2:
msg = "At least two header options for scorefile are required"
logger.critical(msg)
raise RuntimeError(msg)

# Read all the scores into a per residue dictionary
assert len(truncation_scorefile_header) > 1, "At least two column labels are required"
residue_scores = self._read_scorefile(truncation_scorefile)
residue_key = truncation_scorefile_header.pop(0)
truncation_scorefile_header = list(map(str.strip, truncation_scorefile_header))
assert all(
h in residue_scores[0] for h in truncation_scorefile_header
), "Not all column labels are in your CSV file"
self.ensembles = []
for score_key in truncation_scorefile_header:
self._generate_ensembles(residue_key, score_key, residue_scores, truncate_dir, std_models,
truncation_method, percent_truncation, percent_fixed_intervals,
truncation_pruning, side_chain_treatments)

if truncation_method in [truncation_util.TRUNCATION_METHODS.BFACTORS, truncation_util.TRUNCATION_METHODS.ERRORS]:
struct = gemmi.read_structure(std_model)
residue_scores = []
residue_key = "Residue"
score_key = "Bfactor"
for chain in struct[0]:
for residue in chain:
residue_scores.append({residue_key: residue.seqid.num, score_key: residue[0].b_iso})
self._generate_ensembles(residue_key, score_key, residue_scores, truncate_dir, std_models,
truncation_method, percent_truncation, percent_fixed_intervals,
truncation_pruning, side_chain_treatments)

return self.ensembles

def _generate_ensembles(self, residue_key, score_key, residue_scores, truncate_dir, std_models, truncation_method,
percent_truncation, percent_fixed_intervals, truncation_pruning, side_chain_treatments):
zipped_scores = self._generate_residue_scorelist(residue_key, score_key, residue_scores)
score_truncate_dir = os.path.join(truncate_dir, "{}".format(score_key))
if not os.path.isdir(score_truncate_dir):
os.mkdir(score_truncate_dir)
self.ensembles = []
for score_key in truncation_scorefile_header:
zipped_scores = self._generate_residue_scorelist(residue_key, score_key, residue_scores)
score_truncate_dir = os.path.join(truncate_dir, "{}".format(score_key))
if not os.path.isdir(score_truncate_dir):
os.mkdir(score_truncate_dir)

self.truncator = truncation_util.Truncator(work_dir=score_truncate_dir)
self.truncator.theseus_exe = self.theseus_exe
for truncation in self.truncator.truncate_models(
self.truncator = truncation_util.Truncator(work_dir=score_truncate_dir)
self.truncator.theseus_exe = self.theseus_exe
for truncation in self.truncator.truncate_models(
models=std_models,
truncation_method=truncation_method,
percent_truncation=percent_truncation,
percent_fixed_intervals=percent_fixed_intervals,
truncation_pruning=truncation_pruning,
residue_scores=zipped_scores,
):

pre_ensemble = _ensembler.Ensemble()
pre_ensemble.num_residues = truncation.num_residues
pre_ensemble.truncation_dir = truncation.directory
pre_ensemble.truncation_level = truncation.level
pre_ensemble.truncation_method = truncation.method
pre_ensemble.truncation_percent = truncation.percent
pre_ensemble.truncation_residues = truncation.residues
pre_ensemble.truncation_variance = truncation.variances
pre_ensemble.truncation_score_key = score_key.lower()
pre_ensemble.pdb = truncation.models[0]

for ensemble in self.edit_side_chains(pre_ensemble, side_chain_treatments, single_structure=True):
self.ensembles.append(ensemble)

return self.ensembles
):

pre_ensemble = _ensembler.Ensemble()
pre_ensemble.num_residues = truncation.num_residues
pre_ensemble.truncation_dir = truncation.directory
pre_ensemble.truncation_level = truncation.level
pre_ensemble.truncation_method = truncation.method
pre_ensemble.truncation_percent = truncation.percent
pre_ensemble.truncation_residues = truncation.residues
pre_ensemble.truncation_variance = truncation.variances
pre_ensemble.truncation_score_key = score_key.lower()
pre_ensemble.pdb = truncation.models[0]

for ensemble in self.edit_side_chains(pre_ensemble, side_chain_treatments, single_structure=True):
self.ensembles.append(ensemble)

def generate_ensembles_from_amoptd(self, models, amoptd):
"""Generate ensembles from data in supplied ample data dictionary."""
Expand All @@ -132,9 +158,35 @@ def generate_ensembles_from_amoptd(self, models, amoptd):
'truncation_scorefile': amoptd['truncation_scorefile'],
'truncation_scorefile_header': amoptd['truncation_scorefile_header'],
}
kwargs = {k: v for k, v in kwargs.iteritems() if v is not None}
if sys.version_info.major == 3:
kwargs = {k: v for k, v in kwargs.items() if v is not None}
else:
kwargs = {k: v for k, v in kwargs.iteritems() if v is not None}
return self.generate_ensembles(models, **kwargs)

@staticmethod
def _modify_bfactors(pdbin, pdbout):
"""Modify error estimates to B-factors"""
multiplier = 8.0 / 3.0 * math.pi ** 2
bmax = 999.0
rms_max = math.sqrt(bmax / multiplier)
rms_big = 4.0

struct = gemmi.read_structure(pdbin)
for model in struct:
for chain in model:
for residue in chain:
for atom in residue:
occ = max(min(1.0 - (atom.b_iso - rms_big) / (rms_max - rms_big), 1.0), 0.0)
bfac = min(multiplier * atom.b_iso ** 2, bmax)
atom.occ = occ
atom.b_iso = bfac

pdb_string = [line for line in struct.make_minimal_pdb().split('\n') if not line.startswith('ANISOU')]
with open(pdbout, "w") as f_out:
for line in pdb_string:
f_out.write(line + os.linesep)

@staticmethod
def _generate_residue_scorelist(residue_key, score_key, scores):
"""Generate a zipped list of residue indexes and corresponding scores
Expand All @@ -160,4 +212,4 @@ def _read_scorefile(scorefile):
"""
df = pd.read_csv(scorefile)
df.rename(columns=lambda x: x.strip(), inplace=True)
return df.T.to_dict().values()
return list(df.T.to_dict().values())
1 change: 1 addition & 0 deletions ample/ensembler/subcluster.py
Expand Up @@ -318,6 +318,7 @@ def _generate_distance_matrix_generic(self, models, purge=True, purge_all=False,
cmd = [self.executable, '--make-archive', garchive, '-pdb', mdir]
# cmd += [ '-nthreads=auto' ]
cmd += ['-nthreads={0}'.format(self.nproc)]
print(" ".join(cmd))
# HACK FOR DYLD!!!!
env = None
# env = {'DYLD_LIBRARY_PATH' : '/opt/ccp4-devtools/install/lib'}
Expand Down
2 changes: 1 addition & 1 deletion ample/ensembler/tests/test_cluster_util.py
Expand Up @@ -7,7 +7,7 @@

class Test(unittest.TestCase):
def test_random_cluster(self):
models = ["{0}foo{0}bar{0}model_{1}.pdb".format(os.sep, i) for i in xrange(1000)]
models = ["{0}foo{0}bar{0}model_{1}.pdb".format(os.sep, i) for i in range(1000)]
########################################################################
# Test Case 1
max_cluster_size = 200
Expand Down
2 changes: 1 addition & 1 deletion ample/ensembler/tests/test_single_model.py
Expand Up @@ -8,7 +8,7 @@ class Test(unittest.TestCase):
def test_generateResidueScorelist(self):
scores = [
{'residue': (i), 'concoord': (i * 0.5 * 3.452), 'rosetta': (i * 3.452), 'unknown': (i * 1.11 * 3.452)}
for i in xrange(1, 11)
for i in range(1, 11)
]
zipped_concoord = single_model.SingleModelEnsembler._generate_residue_scorelist('residue', 'concoord', scores)
ref_concoord = [
Expand Down
2 changes: 1 addition & 1 deletion ample/ensembler/tests/test_subcluster.py
Expand Up @@ -26,7 +26,7 @@ def test_radius_cctbx(self):
clusterer.generate_distance_matrix(pdb_list)
cluster_files1 = [os.path.basename(x) for x in clusterer.cluster_by_radius(radius)]
ref = ['1_S_00000002.pdb', '1_S_00000004.pdb']
self.assertItemsEqual(ref, cluster_files1)
self.assertEqual(ref, cluster_files1)

@unittest.skipUnless(test_funcs.found_exe("gesamt" + ample_util.EXE_EXT), "gesamt exec missing")
def test_gesamt_matrix_generic(self):
Expand Down
10 changes: 5 additions & 5 deletions ample/ensembler/tests/test_truncation_util.py
Expand Up @@ -26,18 +26,18 @@ def setUpClass(cls):
cls.theseus_exe = ample_util.find_exe('theseus' + ample_util.EXE_EXT)

def test_convert_residue_scores(self):
residue_scores = [(i, 0.1 * i) for i in xrange(1, 11)]
residue_scores = [(i, 0.1 * i) for i in range(1, 11)]
# ensembler = _ensembler.Ensembler()
# scores = ensembler._convert_residue_scores(residue_scores)
scores = truncation_util.Truncator._convert_residue_scores(residue_scores)
score_idxs = [i.idx for i in scores]
ref_score_idxs = [i for i in xrange(10)] # Minus one compared to org data
ref_score_idxs = [i for i in range(10)] # Minus one compared to org data
self.assertEqual(ref_score_idxs, score_idxs)
score_resSeq = [i.resSeq for i in scores]
ref_score_resSeq = [i for i in xrange(1, 11)] # Same i as org data
ref_score_resSeq = [i for i in range(1, 11)] # Same i as org data
self.assertEqual(ref_score_resSeq, score_resSeq)
score_variances = [i.variance for i in scores]
ref_score_variances = [(0.1 * i) for i in xrange(1, 11)] # Same i as org data
ref_score_variances = [(0.1 * i) for i in range(1, 11)] # Same i as org data
self.assertEqual(ref_score_variances, score_variances)

def test_residuesFocussed(self):
Expand All @@ -47,7 +47,7 @@ def test_residuesFocussed(self):
for i in range(l)
]

ref_tlevels = [100, 93, 85, 78, 70, 63, 55, 48, 40, 33, 25, 23, 20, 18, 15, 13, 10, 8, 5, 3]
ref_tlevels = [100, 92, 85, 78, 70, 62, 55, 48, 40, 32, 25, 22, 20, 18, 15, 12, 10, 8, 5, 2]
ref_tvariances = [
160.0,
148.0,
Expand Down

0 comments on commit 2bd60be

Please sign in to comment.