allow specification of records in get method (#43)

BioPandas · Jul 28, 2017 · 901ec3f · 901ec3f
1 parent e887b91
commit 901ec3f
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 11 deletions.
diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
@@ -104,7 +104,7 @@ def fetch_pdb(self, pdb_code):
         self._df = self._construct_df(pdb_lines=self.pdb_text.splitlines(True))
         return self
 
-    def get(self, s, df=None, invert=False):
+    def get(self, s, df=None, invert=False, records=('ATOM', 'HETATM')):
         """Filter PDB DataFrames by properties
 
         Parameters
@@ -120,6 +120,11 @@ def get(self, s, df=None, invert=False):
             Inverts the search query. For example if s='hydrogen' and
             invert=True, all but hydrogen entries are returned.
 
+        records : iterable, default: ('ATOM', 'HETATM')
+            Specify which record sections to consider. For example, to consider
+            both protein and ligand atoms, set `records=('ATOM', 'HETATM')`.
+            This setting is ignored if `df` is not set to None.
+
         Returns
         --------
         df : pandas.DataFrame
@@ -131,15 +136,15 @@ def get(self, s, df=None, invert=False):
         if s not in self._get_dict.keys():
             raise AttributeError('s must be in %s' % self._get_dict.keys())
         if not df:
-            df = self._df['ATOM']
+            df = pd.concat(objs=[self.df[i] for i in records])
         return self._get_dict[s](df, invert=invert)
 
-    def impute_element(self, sections=('ATOM', 'HETATM'), inplace=False):
+    def impute_element(self, records=('ATOM', 'HETATM'), inplace=False):
         """Impute element_symbol from atom_name section.
 
         Parameters
         ----------
-        sections : iterable, default: ('ATOM', 'HETATM')
+        records : iterable, default: ('ATOM', 'HETATM')
             Coordinate sections for which the element symbols should be
             imputed.
 
@@ -159,7 +164,7 @@ def impute_element(self, sections=('ATOM', 'HETATM'), inplace=False):
             for d in self.df:
                 t[d] = self.df[d].copy()
 
-        for sec in sections:
+        for sec in records:
             t[sec]['element_symbol'] = \
                 t[sec][['atom_name', 'element_symbol']].\
                 apply(lambda x: x[0][1]

diff --git a/biopandas/pdb/tests/test_impute.py b/biopandas/pdb/tests/test_impute.py
@@ -19,15 +19,15 @@
 
 
 def test_impute_hetatm():
-    new = ppdb.impute_element(sections=['HETATM'])
+    new = ppdb.impute_element(records=['HETATM'])
     assert new['HETATM']['element_symbol'][1] == 'N'
     assert new['HETATM']['element_symbol'][10] == 'O'
     assert new['ATOM']['element_symbol'][1] == ''
     assert new['ATOM']['element_symbol'][10] == ''
 
 
 def test_impute_atom():
-    new = ppdb.impute_element(sections=['ATOM'])
+    new = ppdb.impute_element(records=['ATOM'])
     assert new['ATOM']['element_symbol'][1] == 'C'
     assert new['ATOM']['element_symbol'][10] == 'C'
     assert new['HETATM']['element_symbol'][1] == ''

diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py
@@ -141,17 +141,17 @@ def test_get_df():
     shape = ppdb.get('c-alpha').shape
     assert shape == (174, 21), shape
 
-    shape = ppdb.get('hydrogen', invert=True).shape
+    shape = ppdb.get('hydrogen', invert=True, records=('ATOM',)).shape
     assert shape == (1330, 21), shape
 
     shape = ppdb.get('hydrogen').shape
     assert shape == (0, 21), shape
 
-    shape = ppdb.get('main chain').shape
+    shape = ppdb.get('main chain', records=('ATOM',)).shape
     assert shape == (696, 21), shape
 
-    shape = ppdb.get('heavy').shape
+    shape = ppdb.get('heavy', records=('ATOM',)).shape
     assert shape == (1330, 21), shape
 
-    shape = ppdb.get('carbon').shape
+    shape = ppdb.get('carbon', records=('ATOM',)).shape
     assert shape == (473, 21), shape
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -17,6 +17,8 @@ The CHANGELOG for the current development version is available at
 ##### Changes
 
 - `PandasPdb.distance` and `PandasMol2.distancd` now accept external `DataFrames` to allow for more efficient distance computations on smaller `DataFrames` if desired. 
+- `PandasPdb.get(...)` now supports external data frames and lets the user specify the record section to be considered (e.g., `records=('ATOM', 'HETATM`)` to include both protein and ligand in a query. Now also defaults to `records=('ATOM', 'HETATM')` for concistency with the impute method.
+- The `section` parameter of `PandasPdb.impute_element(...)` was renamed to `records` for API consistency.
 
 ##### Bug Fixes