Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate conformation libraries from BCE output #135

Merged
merged 34 commits into from
Apr 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
2d62238
Add helper methods to Dihedral object
cescgina Oct 27, 2020
fff3969
Add options to main to run dihedral calculations
cescgina Oct 27, 2020
f0e2a72
Add default path to dihedral library file
cescgina Oct 27, 2020
6717f0c
Add module to calculate dihedral angles
cescgina Oct 27, 2020
93fee12
Move rdkit dependencies into its toolkit
cescgina Oct 28, 2020
d7ebbe1
Change units of dihedral calculation to rad
cescgina Oct 30, 2020
0b74674
Merge branch 'devel' into calc_dihedrals
cescgina Nov 3, 2020
69ceffb
Merge branch 'devel' into calc_dihedrals
cescgina Nov 27, 2020
4720fe8
Remove duplicated method from merge
cescgina Nov 27, 2020
8a3b62a
Fix discrepancy in name of OPLS charges option
cescgina Nov 27, 2020
23bdca8
Udpate format of Dihedral library due to new changes
cescgina Nov 27, 2020
588e8ec
Revert changes in dihedral library format
cescgina Nov 27, 2020
c2c666a
Sort the output of glob to get consistent output
cescgina Nov 27, 2020
0724711
Merge branch 'devel' into calc_dihedrals
cescgina Nov 28, 2020
3b6bea3
Expose charge calculator types to main for consitency
cescgina Nov 28, 2020
dcb2e19
Merge branch 'devel' into calc_dihedrals
cescgina Nov 30, 2020
c2ec73b
Merge branch 'devel' into calc_dihedrals
cescgina Dec 2, 2020
4e1aff9
Add function to calculate RMSD in RDKIT toolkit
cescgina Dec 7, 2020
24a0386
Modify logger level inside BCEDihedrals
cescgina Dec 7, 2020
9122c85
Add new mode in BCEDihedrals
cescgina Dec 7, 2020
39e8c40
Raise more informative error for missing BCEDihedrals
cescgina Dec 23, 2020
8677dda
Avoid calculating rotamer libraries with BCE
cescgina Dec 23, 2020
0bc8193
Avoid logging rotamer library if not calculated
cescgina Dec 24, 2020
06140f6
Do not avoid reading dihedrals with hydrogens
cescgina Jan 5, 2021
f2139b3
Merge branch 'devel' into calc_dihedrals
cescgina Feb 15, 2021
f0dfa8f
Add tests for outputHandler in the dihedral library
cescgina Feb 17, 2021
8d1b214
Pass dihedrals_mode argument to main
cescgina Feb 17, 2021
9662430
Test parsing dihedral library parameters in main
cescgina Feb 17, 2021
88754d3
Add tests for new functions in rdkit toolkit
cescgina Feb 17, 2021
d118a9b
Add tests for BCEDihedrals module
cescgina Feb 17, 2021
68e206c
Switch from calculating dihedrals to translations
cescgina Mar 11, 2021
fc85440
Update test for BCEConformations
cescgina Mar 15, 2021
d1565ee
Merge branch 'devel' into calc_dihedrals
cescgina Mar 30, 2021
a074110
Use parameterize_opls2005 function to avoid errors
cescgina Apr 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions peleffy/BCEConformations/BCEConformations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""
This module contains classes and functions involved in the
creation of a library of dihedral angles from the output of the
BCE server
"""
import os
import glob
import numpy as np
import networkx as nx
import peleffy
from peleffy.utils import Logger
from peleffy.template import impact
from peleffy.topology import molecule
from peleffy.utils.toolkits import RDKitToolkitWrapper

class BCEConformations(object):
"""
A class to produce a library of conformations from the output
of the BCE server
"""
def __init__(self, topology_obj, bce_output_path, verbose=False):
"""
Initializes a BCEConformations object

Parameters
----------
topology_obj : An peleffy.topology.Topology
A Topology object that contains the ligand's information
bce_output_path: str
Path where the output from the BCE server is stored

"""
self._topology = topology_obj
self._molecule = topology_obj.molecule
self.bce_path = bce_output_path
self.conformations_library = {}
self.verbose = verbose

def calculate(self):
"""
Calculate conformation library from the bce output
"""
logger = Logger()
logger.info(' - Calculating conformation library')
if not self.verbose:
logger.set_level('WARNING')
self._calculate_all_conformations()

def _calculate_all_conformations(self):
clusters = sorted(glob.glob(os.path.join(self.bce_path, "CLUSTERS", "CL*", "cluster*.min.imaged.pdb")))
if not clusters:
raise ValueError("Path to the BCE output does not contain a CLUSTERS folder, please check if the path is correct!")
clusters_order = self.order_clusters_min_distances(clusters)
ordered_clusters = [clusters[x] for x in clusters_order]
for cluster in ordered_clusters:
self.calculate_cluster_offsets(cluster)


def calculate_cluster_offsets(self, cluster_pdb):
"""
Calculate dihedral angles from pdb

Parameters
----------
cluster_pdb: str
Path to the cluster representative conformation
"""
rdkit_wrapper = RDKitToolkitWrapper()
conformation_offsets = []
# use the input molecule as template since the cluster structures
# probably will not have proper stereochemistry
mol = molecule.Molecule(cluster_pdb, connectivity_template=self._molecule.rdkit_molecule)
cluster_coordinates = mol.get_conformer()
topology_to_cluster = {i: x for i, x in enumerate(rdkit_wrapper.get_substruct_match(mol, self._molecule))}
cluster_to_topology = {v: k for k, v in topology_to_cluster.items()}
template = impact.Impact(self._topology)
root_coordinates = cluster_coordinates[topology_to_cluster[find_index_root(template.topology, self._topology)]]
for i, coords in enumerate(cluster_coordinates):
conformation_offsets.append([self._topology.atoms[cluster_to_topology[i]].PDB_name]+(coords-root_coordinates).tolist())
self.conformations_library[cluster_pdb] = conformation_offsets

def save(self, output_path):
"""
Save the conformation library

Parameters
----------
output_path: str
Where to save the conformation library
"""
with open(output_path, "w") as fw:
fw.write("* CONFORMATIONS LIBRARY FILE\n")
fw.write('* File generated with peleffy-{}\n'.format( peleffy.__version__))
for conformation_file, conformation_values in self.conformations_library.items():
fw.write("* File: {:s}\n".format(conformation_file))
fw.write("{:s} {:d} {:d}\n".format(self._molecule.tag.upper(), len(conformation_values), len(self.conformations_library)))
for values in conformation_values:
fw.write("{:s} {:5f} {:5f} {:5f}\n".format(*values))
fw.write("ENDCONFORMATION\n")
fw.write("END\n")

def order_clusters_min_distances(self, clusters):
"""
Order a list of structures in a way that jumps to the next structure
involves the minimum change

Parameters
----------

clusters: list
Path to the structures
"""
rdkit_wrapper = RDKitToolkitWrapper()
distances = np.zeros((len(clusters), len(clusters)))
cluster_molecules = []
for cluster in clusters:
# use the input molecule as template since the cluster structures
# probably will not have proper stereochemistry
cluster_molecules.append(molecule.Molecule(cluster, connectivity_template=self._molecule.rdkit_molecule))
for i, cluster_mol in enumerate(cluster_molecules):
for j, cluster_mol_2 in enumerate(cluster_molecules[i+1:], start=i+1):
rmsd_value = rdkit_wrapper.get_rmsd(cluster_mol, cluster_mol_2)
distances[i, j] = rmsd_value
distances[j, i] = rmsd_value
return find_optimal_path_from_matrix(distances)


def find_optimal_path_from_matrix(distances):
"""
Find a Minimum Spanning Tree from a distance matrix. It uses networkx to
create the graph and the MST

Parameters
----------

Returns
-------
min_path : list
Nodes on the graph that minimise heuristically the total distance
"""
graph = nx.from_numpy_matrix(distances)
min_dist = 1e6
min_path = None
for node in graph:
dist, path = find_heuristic_path(graph.copy(), distances, node)
if dist < min_dist:
min_dist = dist
min_path = path
return min_path

def find_heuristic_path(graph, distances, start_node):
n = distances.shape[0]
path = [start_node]
dist = 0
while len(path) < n:
node = path[-1]
new_neighbor = min(graph[node].items(), key=lambda x: x[1]["weight"])
graph.remove_node(node)
path.append(new_neighbor[0])
dist += new_neighbor[1]['weight']
dist += distances[path[-1], start_node]
return dist, path


def find_index_root(sorted_topology, topology):
atom_name = sorted_topology.atoms[0].PDB_name
for i, atom in enumerate(topology.atoms):
if atom.PDB_name == atom_name:
return i
return None
1 change: 1 addition & 0 deletions peleffy/BCEConformations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .BCEConformations import BCEConformations
47 changes: 47 additions & 0 deletions peleffy/data/ligands/trimethylglycine_moved.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
REMARK 4 COMPLIES WITH FORMAT V. 3.0, 1-DEC-2006
REMARK 888
REMARK 888 WRITTEN BY MAESTRO (A PRODUCT OF SCHRODINGER, LLC)
TITLE trimethylglycine_2
MODEL 1
HETATM 1 C1 UNL 1 0.028 -1.392 0.008 1.00 0.00 C
HETATM 2 N1 UNL 1 -0.273 0.003 -0.010 1.00 0.00 N1+
HETATM 3 C2 UNL 1 -0.985 0.255 -1.275 1.00 0.00 C
HETATM 4 C3 UNL 1 -1.213 0.331 1.044 1.00 0.00 C
HETATM 5 C4 UNL 1 0.855 0.875 -0.041 1.00 0.00 C
HETATM 6 C5 UNL 1 2.061 0.381 0.593 1.00 0.00 C
HETATM 7 O1 UNL 1 3.096 1.084 0.610 1.00 0.00 O
HETATM 8 O2 UNL 1 2.145 -0.855 1.201 1.00 0.00 O1-
HETATM 9 H1 UNL 1 0.016 -1.836 -1.009 1.00 0.00 H
HETATM 10 H2 UNL 1 -0.799 -1.910 0.570 1.00 0.00 H
HETATM 11 H3 UNL 1 0.958 -1.673 0.486 1.00 0.00 H
HETATM 12 H4 UNL 1 -0.224 0.185 -2.088 1.00 0.00 H
HETATM 13 H5 UNL 1 -1.834 -0.417 -1.403 1.00 0.00 H
HETATM 14 H6 UNL 1 -1.314 1.314 -1.267 1.00 0.00 H
HETATM 15 H7 UNL 1 -2.179 0.700 0.673 1.00 0.00 H
HETATM 16 H8 UNL 1 -1.429 -0.586 1.636 1.00 0.00 H
HETATM 17 H9 UNL 1 -0.809 1.096 1.743 1.00 0.00 H
HETATM 18 H10 UNL 1 1.028 1.180 -1.092 1.00 0.00 H
HETATM 19 H11 UNL 1 0.525 1.823 0.481 1.00 0.00 H
CONECT 1 2 9 10 11
CONECT 2 1 3 4 5
CONECT 3 2 12 13 14
CONECT 4 2 15 16 17
CONECT 5 2 6 18 19
CONECT 6 5 7 8
CONECT 6 7
CONECT 7 6
CONECT 7 6
CONECT 8 6
CONECT 9 1
CONECT 10 1
CONECT 11 1
CONECT 12 3
CONECT 13 3
CONECT 14 3
CONECT 15 4
CONECT 16 4
CONECT 17 4
CONECT 18 5
CONECT 19 5
ENDMDL
END
12 changes: 12 additions & 0 deletions peleffy/data/parameters/ETH.conformation
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
* CONFORMATION LIBRARY FILE
* File generated with peleffy-1.0.0+164.g06140f6
* File: ../../PELEPipelineSanja/4S0V-LIG/CLUSTERS/CL1/cluster1.min.imaged.pdb
UNK 6 1
_C1_ 0.000000 0.000000 0.000000
_C2_ -1.305000 0.181000 -0.014000
_H1_ 0.720000 0.822000 0.042000
_H2_ 0.448000 -0.988000 -0.029000
_H3_ -1.730000 1.171000 0.015000
_H4_ -1.936000 -0.687000 -0.056000
ENDCONFORMATION
END
10 changes: 10 additions & 0 deletions peleffy/data/parameters/ETH.dihedral
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
* DIHEDRAL LIBRARY FILE
* File generated with peleffy-1.0.0+164.g06140f6
* File: ../../PELEPipelineSanja/4S0V-LIG/CLUSTERS/CL1/cluster1.min.imaged.pdb
UNK 4 1
_H1_ _C1_ _C2_ _H3_ 0.001146
_H1_ _C1_ _C2_ _H4_ -3.141280
_H2_ _C1_ _C2_ _H3_ -3.141120
_H2_ _C1_ _C2_ _H4_ -0.000361
ENDDIHEDRALS
END
31 changes: 26 additions & 5 deletions peleffy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ def parse_args(args):
parser.add_argument("-o", "--output", metavar="PATH",
help="Output path. Default is the current working "
+ "directory")
parser.add_argument("--conformations_info_path", default=None, type=str,
help="Path to the folder containing the BCE output"
+ " used to collect conformations for PELE")
parser.add_argument('--with_solvent', dest='with_solvent',
help="Generate solvent parameters for OBC",
action='store_true')
Expand Down Expand Up @@ -99,8 +102,8 @@ def run_peleffy(pdb_file,
charge_method=DEFAULT_CHARGE_METHOD,
charges_from_file=None,
exclude_terminal_rotamers=True,
output=None, with_solvent=False,
as_datalocal=False):
output=None, with_solvent=False, as_datalocal=False,
conformation_path=None):
"""
It runs peleffy.

Expand Down Expand Up @@ -128,6 +131,10 @@ def run_peleffy(pdb_file,
as_datalocal : bool
Whether to save output files following PELE's DataLocal hierarchy or
not
conformation_path: str
Path to the BCE server outupt to use to extract dihedral angles
dihedral_mode: str
Select what kind of dihedrals to extract (all or only flexible)
"""
if charges_from_file is not None:
charge_method_str = 'file\n' \
Expand Down Expand Up @@ -156,6 +163,7 @@ def run_peleffy(pdb_file,
from peleffy.topology import Molecule
from peleffy.template import Impact
from peleffy.solvent import OBC2
from peleffy.BCEConformations import BCEConformations
from peleffy.forcefield import ForceFieldSelector
from peleffy.topology import Topology
from peleffy.utils import parse_charges_from_mae
Expand All @@ -175,8 +183,10 @@ def run_peleffy(pdb_file,
output_path=output,
as_datalocal=as_datalocal)

rotamer_library = peleffy.topology.RotamerLibrary(molecule)
rotamer_library.to_file(output_handler.get_rotamer_library_path())
if conformation_path is None:
# if conformation_path is set, we don't want a rotamer library
rotamer_library = peleffy.topology.RotamerLibrary(molecule)
rotamer_library.to_file(output_handler.get_rotamer_library_path())

# Parameterize molecule with the selected force field
log.info(' - Parameterizing molecule')
Expand Down Expand Up @@ -205,9 +215,19 @@ def run_peleffy(pdb_file,
solvent = OBC2(topology)
solvent.to_file(output_handler.get_solvent_template_path())

if conformation_path is not None:
previous_level = log.get_level()
conformations = BCEConformations(topology, conformation_path)
conformations.calculate()
conformations.save(output_handler.get_conformation_library_path())
log.set_level(previous_level)

log.info(' - All files were generated successfully:')
log.info(' - {}'.format(output_handler.get_rotamer_library_path()))
if conformation_path is None:
log.info(' - {}'.format(output_handler.get_rotamer_library_path()))
log.info(' - {}'.format(output_handler.get_impact_template_path()))
if conformation_path is not None:
log.info(' - {}'.format(output_handler.get_conformation_library_path()))
if with_solvent:
log.info(' - {}'.format(output_handler.get_solvent_template_path()))

Expand Down Expand Up @@ -256,6 +276,7 @@ def main(args):
output=args.output,
with_solvent=args.with_solvent,
as_datalocal=args.as_datalocal,
conformation_path=args.conformations_info_path,
charges_from_file=args.charges_from_file)


Expand Down
64 changes: 64 additions & 0 deletions peleffy/tests/test_BCEConformations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
This module contains tests that check that the BCEConformations module.
"""

import numpy as np
import pytest

def build_mock_BCEConformations(pdb_file, ffld_file):
from peleffy.topology import Molecule
from peleffy.BCEConformations import BCEConformations
from peleffy.forcefield import ForceFieldSelector
from peleffy.topology import Topology
from .utils import parameterize_opls2005

molecule = Molecule(pdb_file)
ff_selector = ForceFieldSelector()
forcefield = ff_selector.get_by_name("opls2005")
parameters = parameterize_opls2005(forcefield, molecule, ffld_file, charge_method="opls2005")
topology = Topology(molecule, parameters)
return BCEConformations(topology, "")

class TestBCEConformations(object):
"""BCEConformations test."""

from peleffy.BCEConformations import BCEConformations

def test_calculate_cluster_offsets(self):
from peleffy.utils import get_data_file_path

pdb_path = get_data_file_path('ligands/ethylene.pdb')
ffld_path = get_data_file_path("tests/ETL_ffld_output.txt")
golden_offsets = [["_C1_", 0.0, 0.0, 0.0],
["_C2_", -1.305, 0.181, -0.014],
["_H1_", 0.72, 0.822, 0.042],
["_H2_", 0.448, -0.988, -0.029],
["_H3_", -1.73, 1.171, 0.015],
["_H4_", -1.936, -0.687, -0.056]]
bce_obj = build_mock_BCEConformations(pdb_path, ffld_path)
bce_obj.calculate_cluster_offsets(pdb_path)
for dih1, dih2 in zip(bce_obj.conformations_library[pdb_path], golden_offsets):
assert dih1[0] == dih2[0]
np.testing.assert_almost_equal(dih1[1:], dih2[1:], decimal=3)

def test_write_dihedral_library(self):
import os
import tempfile
from peleffy.utils import get_data_file_path, temporary_cd

pdb_path = get_data_file_path('ligands/ethylene.pdb')
ffld_path = get_data_file_path("tests/ETL_ffld_output.txt")
bce_obj = build_mock_BCEConformations(pdb_path, ffld_path)
bce_obj.calculate_cluster_offsets(pdb_path)
calculated_lines = []
with tempfile.TemporaryDirectory() as tmpdir:
with temporary_cd(tmpdir):
bce_obj.save(os.path.join(tmpdir, "ETH.conformation"))
with open(os.path.join(tmpdir, "ETH.conformation")) as f:
calculated_lines = f.readlines()
calculated_lines = calculated_lines[3:]
golden_conformation_library_path = get_data_file_path('parameters/ETH.conformation')
with open(golden_conformation_library_path) as f:
golden_lines = f.readlines()[3:]

assert golden_lines == calculated_lines
1 change: 1 addition & 0 deletions peleffy/tests/test_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def test_modules(self):
from peleffy import template
from peleffy import topology
from peleffy import utils
from peleffy import BCEConformations

except ImportError as e:
raise AssertionError('The following peleffy module is missing: '
Expand Down
Loading