Skip to content

Commit

Permalink
allow specification of records in get method (#43)
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Jul 28, 2017
1 parent e887b91 commit 901ec3f
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 11 deletions.
15 changes: 10 additions & 5 deletions biopandas/pdb/pandas_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def fetch_pdb(self, pdb_code):
self._df = self._construct_df(pdb_lines=self.pdb_text.splitlines(True))
return self

def get(self, s, df=None, invert=False):
def get(self, s, df=None, invert=False, records=('ATOM', 'HETATM')):
"""Filter PDB DataFrames by properties
Parameters
Expand All @@ -120,6 +120,11 @@ def get(self, s, df=None, invert=False):
Inverts the search query. For example if s='hydrogen' and
invert=True, all but hydrogen entries are returned.
records : iterable, default: ('ATOM', 'HETATM')
Specify which record sections to consider. For example, to consider
both protein and ligand atoms, set `records=('ATOM', 'HETATM')`.
This setting is ignored if `df` is not set to None.
Returns
--------
df : pandas.DataFrame
Expand All @@ -131,15 +136,15 @@ def get(self, s, df=None, invert=False):
if s not in self._get_dict.keys():
raise AttributeError('s must be in %s' % self._get_dict.keys())
if not df:
df = self._df['ATOM']
df = pd.concat(objs=[self.df[i] for i in records])
return self._get_dict[s](df, invert=invert)

def impute_element(self, sections=('ATOM', 'HETATM'), inplace=False):
def impute_element(self, records=('ATOM', 'HETATM'), inplace=False):
"""Impute element_symbol from atom_name section.
Parameters
----------
sections : iterable, default: ('ATOM', 'HETATM')
records : iterable, default: ('ATOM', 'HETATM')
Coordinate sections for which the element symbols should be
imputed.
Expand All @@ -159,7 +164,7 @@ def impute_element(self, sections=('ATOM', 'HETATM'), inplace=False):
for d in self.df:
t[d] = self.df[d].copy()

for sec in sections:
for sec in records:
t[sec]['element_symbol'] = \
t[sec][['atom_name', 'element_symbol']].\
apply(lambda x: x[0][1]
Expand Down
4 changes: 2 additions & 2 deletions biopandas/pdb/tests/test_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@


def test_impute_hetatm():
new = ppdb.impute_element(sections=['HETATM'])
new = ppdb.impute_element(records=['HETATM'])
assert new['HETATM']['element_symbol'][1] == 'N'
assert new['HETATM']['element_symbol'][10] == 'O'
assert new['ATOM']['element_symbol'][1] == ''
assert new['ATOM']['element_symbol'][10] == ''


def test_impute_atom():
new = ppdb.impute_element(sections=['ATOM'])
new = ppdb.impute_element(records=['ATOM'])
assert new['ATOM']['element_symbol'][1] == 'C'
assert new['ATOM']['element_symbol'][10] == 'C'
assert new['HETATM']['element_symbol'][1] == ''
Expand Down
8 changes: 4 additions & 4 deletions biopandas/pdb/tests/test_read_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,17 @@ def test_get_df():
shape = ppdb.get('c-alpha').shape
assert shape == (174, 21), shape

shape = ppdb.get('hydrogen', invert=True).shape
shape = ppdb.get('hydrogen', invert=True, records=('ATOM',)).shape
assert shape == (1330, 21), shape

shape = ppdb.get('hydrogen').shape
assert shape == (0, 21), shape

shape = ppdb.get('main chain').shape
shape = ppdb.get('main chain', records=('ATOM',)).shape
assert shape == (696, 21), shape

shape = ppdb.get('heavy').shape
shape = ppdb.get('heavy', records=('ATOM',)).shape
assert shape == (1330, 21), shape

shape = ppdb.get('carbon').shape
shape = ppdb.get('carbon', records=('ATOM',)).shape
assert shape == (473, 21), shape
2 changes: 2 additions & 0 deletions docs/sources/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ The CHANGELOG for the current development version is available at
##### Changes

- `PandasPdb.distance` and `PandasMol2.distancd` now accept external `DataFrames` to allow for more efficient distance computations on smaller `DataFrames` if desired.
- `PandasPdb.get(...)` now supports external data frames and lets the user specify the record section to be considered (e.g., `records=('ATOM', 'HETATM`)` to include both protein and ligand in a query. Now also defaults to `records=('ATOM', 'HETATM')` for concistency with the impute method.
- The `section` parameter of `PandasPdb.impute_element(...)` was renamed to `records` for API consistency.

##### Bug Fixes

Expand Down

0 comments on commit 901ec3f

Please sign in to comment.