Merge pull request #131 from rigdenlab/changes

Minor changes and bug fixes
rigdenlab · Aug 7, 2020 · bdde11c · bdde11c
2 parents 9aa65d4 + 2d55a93
commit bdde11c
Show file tree

Hide file tree

Showing 5 changed files with 44 additions and 26 deletions.
diff --git a/simbad/command_line/simbad_database.py b/simbad/command_line/simbad_database.py
@@ -38,9 +38,11 @@
 from simbad.util.pdb_util import PdbStructure
 
 if sys.version_info.major < 3:
-    from urllib2 import urlopen
+    from urllib2 import urlopen, HTTPError
 else:
     from urllib.request import urlopen
+    HTTPError = urllib.error.HTTPError
+
 
 logger = None
 
@@ -75,13 +77,14 @@ def is_valid_db_location(database):
     """Validate permissions for a database"""
     return os.access(os.path.dirname(os.path.abspath(database)), os.W_OK)
 
+
 def is_readable_file(file):
     """Validate that the PDB file is readable"""
-    sol_calc = simbad.util.matthews_prob.SolventContent(17000)
     try:
-        pdb_struct = simbad.util.pdb_util.PdbStructure()
-        pdb_struct.from_file(file)
-        sol_calc.calculate_from_struct(pdb_struct)
+        pdb_struct = PdbStructure.from_file(file)
+        # Call functions that require file to be properly read
+        pdb_struct.molecular_weight
+        pdb_struct.nres
     except Exception:
         return False
     return True
@@ -98,11 +101,15 @@ def download_morda():
     """
     logger.info("Downloading MoRDa database from CCP4")
     url = "http://www.ccp4.ac.uk/morda/MoRDa_DB.tar.gz"
+    url_legacy =  "http://legacy.ccp4.ac.uk/morda/MoRDa_DB.tar.gz"
     local_db = os.path.basename(url)
     # http://stackoverflow.com/a/34831866/3046533
     chunk_size = 1 << 20
     with open(local_db, "wb") as f_out:
-        query = urlopen(url)
+        try:
+            query = urlopen(url)
+        except HTTPError:
+            query = urlopen(url_legacy)
         while True:
             chunk = query.read(chunk_size)
             if not chunk:
@@ -248,6 +255,8 @@ def create_contaminant_db(database, add_morda_domains, nproc=2, submit_qtype=Non
         except:
             msg = "Error downloading contaminant directory from https://github.com/rigdenlab/SIMBAD/trunk/static/contaminants"
             raise RuntimeError(msg)
+        if os.path.isdir(database):
+            shutil.rmtree(database)
         shutil.move(os.path.join(os.getcwd(), 'contaminants'), database)
         return
 
@@ -357,10 +366,10 @@ def create_contaminant_db(database, add_morda_domains, nproc=2, submit_qtype=Non
                          success_func=None)
 
             for output, final in files:
-                if os.path.isfile(output):
+                if os.path.isfile(output) and is_readable_file(output):
                     simbad.db.convert_pdb_to_dat(output, final)
                 else:
-                    logger.warning("File missing: {}".format(output))
+                    logger.critical("Problem with file: %s", output)
 
             for d in tmps:
                 shutil.rmtree(d)
@@ -389,7 +398,7 @@ def create_morda_db(database, nproc=2, submit_qtype=None, submit_queue=False, ch
        The queue to submit to on the cluster
     chunk_size : int, optional
        The number of jobs to submit at the same time [default: 5000]
-    
+
     Raises
     ------
     RuntimeError
@@ -715,7 +724,10 @@ def create_db_custom(custom_db, database):
         os.makedirs(sub_dir)
 
     for output, final in files:
-        simbad.db.convert_pdb_to_dat(output, final)
+        if os.path.isfile(output) and is_readable_file(output):
+            simbad.db.convert_pdb_to_dat(output, final)
+        else:
+            logger.critical("Problem with file: %s", output)
 
     validate_compressed_database(database)
     leave_timestamp(os.path.join(database, 'simbad_custom.txt'))

diff --git a/simbad/util/pdb_util.py b/simbad/util/pdb_util.py
@@ -21,6 +21,7 @@ def from_file(cls, input_file):
             struct.structure = gemmi.read_structure(input_file)
         elif input_file.endswith(".ent.gz"):
             struct.structure = gemmi.read_structure(input_file)
+        struct.assert_structure()
         return struct
 
     @classmethod
@@ -31,8 +32,12 @@ def from_pdb_code(cls, pdb_code):
             struct.structure = gemmi.read_pdb_string(content)
         else:
             raise RuntimeError
+        struct.assert_structure()
         return struct
 
+    def assert_structure(self):
+        assert self.structure[0].count_atom_sites() > 0, "No atoms found in structure"
+
     @staticmethod
     def get_pdb_content(pdb_code):
         import iotbx.pdb.fetch
@@ -51,12 +56,12 @@ def get_pdb_content(pdb_code):
     def get_sequence_info(self):
         chain2data = {}
         unique_chains = []
-        for chain in self.structure[0]: # Just the first model
+        for chain in self.structure[0]:  # Just the first model
             id = chain.name
             seq = ""
-            for residue in chain.first_conformer(): # Only use the main conformer
+            for residue in chain.first_conformer():  # Only use the main conformer
                 res_info = gemmi.find_tabulated_residue(residue.name)
-                if res_info.is_amino_acid(): # Only amino acids
+                if res_info.is_amino_acid():  # Only amino acids
                     seq += res_info.one_letter_code
             if seq not in unique_chains:
                 chain2data[id] = seq
@@ -66,18 +71,18 @@ def get_sequence_info(self):
     @property
     def molecular_weight(self):
         mw = 0
-        hydrogen_atoms = 2 # For each end
+        hydrogen_atoms = 0
         for chain in self.structure[0]:  # Just first model
             for residue in chain:
                 res_info = gemmi.find_tabulated_residue(residue.name)
                 # Many PDB files don't include hydrogens so account for them here
-                hydrogen_atoms += res_info.hydrogen_count - 2 # Due to peptide bonds
-                if res_info.is_standard(): # Only count standard amino acids
+                hydrogen_atoms += res_info.hydrogen_count - 2  # Due to peptide bonds
+                if res_info.is_standard():  # Only count standard amino acids
                     for atom in residue:
-                        if atom.is_hydrogen: # Ignore as hydrogens already counted
+                        if atom.is_hydrogen:  # Ignore as hydrogens already counted
                             pass
                         mw += atom.element.weight * atom.occ
-        mw += gemmi.Element('H').weight * hydrogen_atoms
+        mw += gemmi.Element('H').weight * hydrogen_atoms + 2  # Plus 2 for each end
         return mw
 
     @property
@@ -117,7 +122,6 @@ def nres(self):
     def keep_first_chain_only(self):
         self.select_chain_by_idx(0)
 
-
     def keep_first_model_only(self):
         models = [m.name for m in self.structure]
         for model in models[1:]:
@@ -155,4 +159,4 @@ def save(self, pdbout, remarks=[]):
             for remark in remarks:
                 f_out.write("REMARK %s" % remark + os.linesep)
             for line in pdb_string:
-                f_out.write(line + os.linesep)
+                f_out.write(line + os.linesep)
diff --git a/simbad/util/pyrvapi_results.py b/simbad/util/pyrvapi_results.py
@@ -159,7 +159,7 @@ def __init__(self, rvapi_document, webserver_uri, display_gui, logfile, work_dir
         pyrvapi.rvapi_flush()
 
     @staticmethod
-    def init_from_ccp4i2_xml(self, ccp4i2_xml, pyrvapi_dir, share_jsrview, wintitle):
+    def init_from_ccp4i2_xml(ccp4i2_xml, pyrvapi_dir, share_jsrview, wintitle):
         """This code is largely stolen from Andrew Lebedev"""
 
         # // Document modes

diff --git a/simbad/util/tests/test_matthews_prob.py b/simbad/util/tests/test_matthews_prob.py
@@ -4,6 +4,7 @@
 __date__ = "16 Aug 2017"
 
 import os
+import numpy as np
 import unittest
 from simbad.util import matthews_prob
 
@@ -23,7 +24,7 @@ def test_solvent_content(self):
 
         reference_data = 0.48960068050640637
 
-        self.assertAlmostEqual(data, reference_data)
+        self.assertAlmostEqual(np.round(data, 3), np.round(reference_data, 3))
 
     def test_matthews_prob(self):
         """Test case for matthews_prob.MatthewsProbability.calculate_from_file"""
@@ -35,7 +36,7 @@ def test_matthews_prob(self):
 
         reference_data = (0.48960068050640637, 1)
 
-        self.assertAlmostEqual(data[0], reference_data[0])
+        self.assertAlmostEqual(np.round(data[0], 3), np.round(reference_data[0], 3))
         self.assertAlmostEqual(data[1], reference_data[1])
 
 

diff --git a/simbad/util/tests/test_pdb_util.py b/simbad/util/tests/test_pdb_util.py
@@ -4,6 +4,7 @@
 __date__ = "19 Jan 2018"
 
 import os
+import numpy as np
 import sys
 import unittest
 
@@ -49,7 +50,7 @@ def test_molecular_weight_1(self):
         data = pdb_struct.molecular_weight
         reference_data = 6855.978639999951
 
-        self.assertAlmostEqual(data, reference_data)
+        self.assertAlmostEqual(np.round(data, 0), np.round(reference_data, 0))
 
     def test_molecular_weight_2(self):
         """Test case for PdbStructure.molecular_weight"""
@@ -59,7 +60,7 @@ def test_molecular_weight_2(self):
         data = pdb_struct.molecular_weight
         reference_data = 21163.001080955823
 
-        self.assertAlmostEqual(data, reference_data)
+        self.assertAlmostEqual(np.round(data, 0), np.round(reference_data, 0))
 
     def test_molecular_weight_3(self):
         """Test case for PdbStructure.molecular_weight"""
@@ -69,7 +70,7 @@ def test_molecular_weight_3(self):
         data = pdb_struct.molecular_weight
         reference_data = 128535.91044924183
 
-        self.assertAlmostEqual(data, reference_data)
+        self.assertAlmostEqual(np.round(data, 0), np.round(reference_data, 0))
 
     @unittest.skipIf('THIS_IS_TRAVIS' in os.environ, "not implemented in Travis CI")
     def test_standardise_1(self):