Skip to content

Commit

Permalink
amino3to1 index improvement (#25)
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed May 6, 2017
1 parent fb1296f commit 2bc018a
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
2 changes: 1 addition & 1 deletion biopandas/pdb/pandas_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ def amino3to1(self, record='ATOM',
cmp = 'placeholder'
indices = []

for num, ind in zip(tmp['residue_number'], tmp.index):
for num, ind in zip(tmp['residue_number'], np.arange(tmp.shape[0])):
if num != cmp:
indices.append(ind)
cmp = num
Expand Down
33 changes: 33 additions & 0 deletions biopandas/pdb/tests/test_amino3to1.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Project Website: http://rasbt.github.io/biopandas/
# Code Repository: https://github.com/rasbt/biopandas

import numpy as np
from biopandas.pdb import PandasPdb
import os

Expand Down Expand Up @@ -37,6 +38,38 @@ def test_defaults():
assert expect_res == got_res


def test_sameindex():
TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data',
'1t48_995.pdb')
p1t48 = PandasPdb()
p1t48.read_pdb(TESTDATA_1t48)
print(p1t48)
p1t48.df['ATOM'].index = np.zeros(p1t48.df['ATOM'].shape[0], dtype=int)

expect_res = ['M', 'E', 'M', 'E', 'K', 'E', 'F', 'E', 'Q',
'I', 'D', 'K', 'S', 'G', 'S', 'W', 'A', 'A',
'I', 'Y', 'Q', 'D', 'I', 'R', 'H', 'E', 'A',
'S', 'D', 'F', 'P', 'C', 'R', 'V', 'A', 'K',
'L', 'P', 'K', 'N', 'K', 'N', 'R', 'N', 'R',
'Y', 'R', 'D', 'V', 'S', 'P', 'F', 'D', 'H',
'S', 'R', 'I', 'K', 'L', 'H', 'Q', 'E', 'D',
'N', 'D', 'Y', 'I', 'N', 'A', 'S', 'L', 'I',
'K', 'M', 'E', 'E', 'A', 'Q', 'R', 'S', 'Y',
'I', 'L', 'T', 'Q', 'G', 'P', 'L', 'P', 'N',
'T', 'C', 'G', 'H', 'F', 'W', 'E', 'M', 'V',
'W', 'E', 'Q', 'K', 'S', 'R', 'G', 'V', 'V',
'M', 'L', 'N', 'R', 'V', 'M', 'E', 'K', 'G',
'S', 'L', 'K']

transl = p1t48.amino3to1()
expect_chain = ['A' for _ in range(transl.shape[0])]
got_chain = list(transl['chain_id'].values)
got_res = list(transl['residue_name'].values)

assert expect_chain == got_chain
assert expect_res == got_res


def test_multichain():
TESTDATA_5mtn = os.path.join(os.path.dirname(__file__),
'data', '5mtn_multichain.pdb')
Expand Down
3 changes: 2 additions & 1 deletion docs/sources/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ The CHANGELOG for the current development version is available at
##### Changes

- The `amino3to1` method of `biopandas.pdb.PandasPDB` objects now returns a pandas `DataFrame` instead of a pandas `Series` object. The returned data frame has two columns, `'chain_id'` and `'residue_name'`, where the former contains the chain ID of the amino acid and the latter contains the 1-letter amino acid code, respectively.
- Significant speed improvements of the `distance` method of both `PandasPdb` and `PandasMol2` (now about 300 percent faster than previously)
- Significant speed improvements of the `distance` method of both `PandasPdb` and `PandasMol2` (now about 300 percent faster than previously).

##### Bug Fixes

- The `amino3to1` method of `biopandas.pdb.PandasPDB` objects now handles multi-chain proteins correctly.
- The `amino3to1` method of `biopandas.pdb.PandasPDB` objects now also works as expected if the `'ATOM'` entry DataFrame contains disordered DataFrame indices or duplicate DataFrame index values.



Expand Down

0 comments on commit 2bc018a

Please sign in to comment.