Skip to content

Commit

Permalink
Merge pull request #131 from rigdenlab/changes
Browse files Browse the repository at this point in the history
Minor changes and bug fixes
  • Loading branch information
hlasimpk committed Aug 7, 2020
2 parents 9aa65d4 + 2d55a93 commit bdde11c
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 26 deletions.
32 changes: 22 additions & 10 deletions simbad/command_line/simbad_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@
from simbad.util.pdb_util import PdbStructure

if sys.version_info.major < 3:
from urllib2 import urlopen
from urllib2 import urlopen, HTTPError
else:
from urllib.request import urlopen
HTTPError = urllib.error.HTTPError


logger = None

Expand Down Expand Up @@ -75,13 +77,14 @@ def is_valid_db_location(database):
"""Validate permissions for a database"""
return os.access(os.path.dirname(os.path.abspath(database)), os.W_OK)


def is_readable_file(file):
"""Validate that the PDB file is readable"""
sol_calc = simbad.util.matthews_prob.SolventContent(17000)
try:
pdb_struct = simbad.util.pdb_util.PdbStructure()
pdb_struct.from_file(file)
sol_calc.calculate_from_struct(pdb_struct)
pdb_struct = PdbStructure.from_file(file)
# Call functions that require file to be properly read
pdb_struct.molecular_weight
pdb_struct.nres
except Exception:
return False
return True
Expand All @@ -98,11 +101,15 @@ def download_morda():
"""
logger.info("Downloading MoRDa database from CCP4")
url = "http://www.ccp4.ac.uk/morda/MoRDa_DB.tar.gz"
url_legacy = "http://legacy.ccp4.ac.uk/morda/MoRDa_DB.tar.gz"
local_db = os.path.basename(url)
# http://stackoverflow.com/a/34831866/3046533
chunk_size = 1 << 20
with open(local_db, "wb") as f_out:
query = urlopen(url)
try:
query = urlopen(url)
except HTTPError:
query = urlopen(url_legacy)
while True:
chunk = query.read(chunk_size)
if not chunk:
Expand Down Expand Up @@ -248,6 +255,8 @@ def create_contaminant_db(database, add_morda_domains, nproc=2, submit_qtype=Non
except:
msg = "Error downloading contaminant directory from https://github.com/rigdenlab/SIMBAD/trunk/static/contaminants"
raise RuntimeError(msg)
if os.path.isdir(database):
shutil.rmtree(database)
shutil.move(os.path.join(os.getcwd(), 'contaminants'), database)
return

Expand Down Expand Up @@ -357,10 +366,10 @@ def create_contaminant_db(database, add_morda_domains, nproc=2, submit_qtype=Non
success_func=None)

for output, final in files:
if os.path.isfile(output):
if os.path.isfile(output) and is_readable_file(output):
simbad.db.convert_pdb_to_dat(output, final)
else:
logger.warning("File missing: {}".format(output))
logger.critical("Problem with file: %s", output)

for d in tmps:
shutil.rmtree(d)
Expand Down Expand Up @@ -389,7 +398,7 @@ def create_morda_db(database, nproc=2, submit_qtype=None, submit_queue=False, ch
The queue to submit to on the cluster
chunk_size : int, optional
The number of jobs to submit at the same time [default: 5000]
Raises
------
RuntimeError
Expand Down Expand Up @@ -715,7 +724,10 @@ def create_db_custom(custom_db, database):
os.makedirs(sub_dir)

for output, final in files:
simbad.db.convert_pdb_to_dat(output, final)
if os.path.isfile(output) and is_readable_file(output):
simbad.db.convert_pdb_to_dat(output, final)
else:
logger.critical("Problem with file: %s", output)

validate_compressed_database(database)
leave_timestamp(os.path.join(database, 'simbad_custom.txt'))
Expand Down
24 changes: 14 additions & 10 deletions simbad/util/pdb_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def from_file(cls, input_file):
struct.structure = gemmi.read_structure(input_file)
elif input_file.endswith(".ent.gz"):
struct.structure = gemmi.read_structure(input_file)
struct.assert_structure()
return struct

@classmethod
Expand All @@ -31,8 +32,12 @@ def from_pdb_code(cls, pdb_code):
struct.structure = gemmi.read_pdb_string(content)
else:
raise RuntimeError
struct.assert_structure()
return struct

def assert_structure(self):
assert self.structure[0].count_atom_sites() > 0, "No atoms found in structure"

@staticmethod
def get_pdb_content(pdb_code):
import iotbx.pdb.fetch
Expand All @@ -51,12 +56,12 @@ def get_pdb_content(pdb_code):
def get_sequence_info(self):
chain2data = {}
unique_chains = []
for chain in self.structure[0]: # Just the first model
for chain in self.structure[0]: # Just the first model
id = chain.name
seq = ""
for residue in chain.first_conformer(): # Only use the main conformer
for residue in chain.first_conformer(): # Only use the main conformer
res_info = gemmi.find_tabulated_residue(residue.name)
if res_info.is_amino_acid(): # Only amino acids
if res_info.is_amino_acid(): # Only amino acids
seq += res_info.one_letter_code
if seq not in unique_chains:
chain2data[id] = seq
Expand All @@ -66,18 +71,18 @@ def get_sequence_info(self):
@property
def molecular_weight(self):
mw = 0
hydrogen_atoms = 2 # For each end
hydrogen_atoms = 0
for chain in self.structure[0]: # Just first model
for residue in chain:
res_info = gemmi.find_tabulated_residue(residue.name)
# Many PDB files don't include hydrogens so account for them here
hydrogen_atoms += res_info.hydrogen_count - 2 # Due to peptide bonds
if res_info.is_standard(): # Only count standard amino acids
hydrogen_atoms += res_info.hydrogen_count - 2 # Due to peptide bonds
if res_info.is_standard(): # Only count standard amino acids
for atom in residue:
if atom.is_hydrogen: # Ignore as hydrogens already counted
if atom.is_hydrogen: # Ignore as hydrogens already counted
pass
mw += atom.element.weight * atom.occ
mw += gemmi.Element('H').weight * hydrogen_atoms
mw += gemmi.Element('H').weight * hydrogen_atoms + 2 # Plus 2 for each end
return mw

@property
Expand Down Expand Up @@ -117,7 +122,6 @@ def nres(self):
def keep_first_chain_only(self):
self.select_chain_by_idx(0)


def keep_first_model_only(self):
models = [m.name for m in self.structure]
for model in models[1:]:
Expand Down Expand Up @@ -155,4 +159,4 @@ def save(self, pdbout, remarks=[]):
for remark in remarks:
f_out.write("REMARK %s" % remark + os.linesep)
for line in pdb_string:
f_out.write(line + os.linesep)
f_out.write(line + os.linesep)
2 changes: 1 addition & 1 deletion simbad/util/pyrvapi_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def __init__(self, rvapi_document, webserver_uri, display_gui, logfile, work_dir
pyrvapi.rvapi_flush()

@staticmethod
def init_from_ccp4i2_xml(self, ccp4i2_xml, pyrvapi_dir, share_jsrview, wintitle):
def init_from_ccp4i2_xml(ccp4i2_xml, pyrvapi_dir, share_jsrview, wintitle):
"""This code is largely stolen from Andrew Lebedev"""

# // Document modes
Expand Down
5 changes: 3 additions & 2 deletions simbad/util/tests/test_matthews_prob.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
__date__ = "16 Aug 2017"

import os
import numpy as np
import unittest
from simbad.util import matthews_prob

Expand All @@ -23,7 +24,7 @@ def test_solvent_content(self):

reference_data = 0.48960068050640637

self.assertAlmostEqual(data, reference_data)
self.assertAlmostEqual(np.round(data, 3), np.round(reference_data, 3))

def test_matthews_prob(self):
"""Test case for matthews_prob.MatthewsProbability.calculate_from_file"""
Expand All @@ -35,7 +36,7 @@ def test_matthews_prob(self):

reference_data = (0.48960068050640637, 1)

self.assertAlmostEqual(data[0], reference_data[0])
self.assertAlmostEqual(np.round(data[0], 3), np.round(reference_data[0], 3))
self.assertAlmostEqual(data[1], reference_data[1])


Expand Down
7 changes: 4 additions & 3 deletions simbad/util/tests/test_pdb_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
__date__ = "19 Jan 2018"

import os
import numpy as np
import sys
import unittest

Expand Down Expand Up @@ -49,7 +50,7 @@ def test_molecular_weight_1(self):
data = pdb_struct.molecular_weight
reference_data = 6855.978639999951

self.assertAlmostEqual(data, reference_data)
self.assertAlmostEqual(np.round(data, 0), np.round(reference_data, 0))

def test_molecular_weight_2(self):
"""Test case for PdbStructure.molecular_weight"""
Expand All @@ -59,7 +60,7 @@ def test_molecular_weight_2(self):
data = pdb_struct.molecular_weight
reference_data = 21163.001080955823

self.assertAlmostEqual(data, reference_data)
self.assertAlmostEqual(np.round(data, 0), np.round(reference_data, 0))

def test_molecular_weight_3(self):
"""Test case for PdbStructure.molecular_weight"""
Expand All @@ -69,7 +70,7 @@ def test_molecular_weight_3(self):
data = pdb_struct.molecular_weight
reference_data = 128535.91044924183

self.assertAlmostEqual(data, reference_data)
self.assertAlmostEqual(np.round(data, 0), np.round(reference_data, 0))

@unittest.skipIf('THIS_IS_TRAVIS' in os.environ, "not implemented in Travis CI")
def test_standardise_1(self):
Expand Down

0 comments on commit bdde11c

Please sign in to comment.