Skip to content

Commit

Permalink
Merge pull request #284 from tovrstra/fix-pdb-cl
Browse files Browse the repository at this point in the history
Fix PDB load_one for Cl atoms
  • Loading branch information
tovrstra committed Jun 12, 2023
2 parents 0311b86 + 5d3b498 commit ca6113b
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 34 deletions.
56 changes: 45 additions & 11 deletions iodata/formats/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,36 @@
PATTERNS = ['*.pdb']


def _parse_pdb_atom_line(line):
def _parse_pdb_atom_line(line, lit):
"""Parse an ATOM or HETATM line from a PDB file.
Parameters
----------
line
A string with a single ATOM or HETATM line.
lit
The line iterator which read the line, used for generating warnings when needed.
Returns
-------
atnum
Atomic number.
atname
The atom name.
resname
The residua name.
chainid
The chain ID.
resnum
The residue number.
atcoord
Cartesian coordinates of the atomic nucleus.
occupancy
The occupancy (usually from the XRD analysis).
bfactor
The temperature factor (usually from the XRD analysis).
"""
# Overview of ATOM records
# COLUMNS DATA TYPE FIELD DEFINITION
# -------------------------------------------------------------------------------------
Expand All @@ -62,16 +91,21 @@ def _parse_pdb_atom_line(line):
# 79 - 80 LString(2) charge Charge on the atom.

# Get element symbol from position 77:78 in pdb format
words = line[76:78].split()
if not words:
symbol = line[76:78].strip()
if len(symbol) > 0:
atnum = sym2num.get(symbol)
else:
# If not present, guess it from position 13:16 (atom name)
words = line[12:16].split()
# assign atomic number
symbol = words[0].title()
atnum = sym2num.get(symbol, sym2num.get(symbol[0], None))
atname = line[12:16].strip()
atnum = sym2num.get(atname, sym2num.get(atname[:2].title(), sym2num.get(atname[0], None)))
lit.warn("Using the atom name in the PDB file to guess the chemical element.")
if atnum is None:
atnum = 0
lit.warn(f"Failed to determine the atomic number. atname='{atname}' symbol='{symbol}'")

# atom name, residue name, chain id, & residue sequence number
attype = line[12:16].strip()
restype = line[17:20].strip()
atname = line[12:16].strip()
resname = line[17:20].strip()
chainid = line[21]
resnum = int(line[22:26])
# add x, y, and z
Expand All @@ -83,7 +117,7 @@ def _parse_pdb_atom_line(line):
# get occupancies & temperature factor
occupancy = float(line[54:60])
bfactor = float(line[60:66])
return atnum, attype, restype, chainid, resnum, atcoord, occupancy, bfactor
return atnum, atname, resname, chainid, resnum, atcoord, occupancy, bfactor


def _parse_pdb_conect_line(line):
Expand Down Expand Up @@ -133,7 +167,7 @@ def load_one(lit: LineIterator) -> dict: # pylint: disable=too-many-branches, t
compnd_lines.append(line[10:].strip())
if line.startswith("ATOM") or line.startswith("HETATM"):
(atnum, attype, restype, chainid, resnum, atcoord, occupancy,
bfactor) = _parse_pdb_atom_line(line)
bfactor) = _parse_pdb_atom_line(line, lit)
atnums.append(atnum)
attypes.append(attype)
restypes.append(restype)
Expand Down
88 changes: 88 additions & 0 deletions iodata/test/data/indomethacin-dimer.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
TITLE indomethacin_indomethacin
REMARK THIS IS A SIMULATION BOX
CRYST1 3000.000 3000.000 3000.000 90.00 90.00 90.00 P 1 1
MODEL 1
ATOM 1 C1 XXX 1 1002.3891002.1441003.016 1.00 0.00
ATOM 2 C2 XXX 1 1001.0891001.9451002.264 1.00 0.00
ATOM 3 C3 XXX 1 999.8401002.0381002.867 1.00 0.00
ATOM 4 C4 XXX 1 998.9231001.8711001.764 1.00 0.00
ATOM 5 C5 XXX 1 999.6481001.6901000.605 1.00 0.00
ATOM 6 N6 XXX 1 1001.0471001.7681000.868 1.00 0.00
ATOM 7 C7 XXX 1 1002.1311001.735 999.982 1.00 0.00
ATOM 8 O8 XXX 1 1003.2631001.4431000.335 1.00 0.00
ATOM 9 C9 XXX 1 1001.8731002.152 998.556 1.00 0.00
ATOM 10 C10 XXX 1 1002.4821001.445 997.502 1.00 0.00
ATOM 11 C11 XXX 1 1002.2311001.801 996.167 1.00 0.00
ATOM 12 C12 XXX 1 1001.3961002.889 995.878 1.00 0.00
ATOM 13 C13 XXX 1 1000.8251003.634 996.921 1.00 0.00
ATOM 14 C14 XXX 1 1001.0701003.275 998.257 1.00 0.00
ATOM 15 CL15 XXX 1 1001.0771003.328 994.215 1.00 0.00
ATOM 16 C16 XXX 1 998.9921001.379 999.401 1.00 0.00
ATOM 17 C17 XXX 1 997.5841001.355 999.390 1.00 0.00
ATOM 18 C18 XXX 1 996.8471001.5931000.568 1.00 0.00
ATOM 19 C19 XXX 1 997.5241001.8391001.783 1.00 0.00
ATOM 20 O20 XXX 1 995.4701001.5461000.491 1.00 0.00
ATOM 21 C21 XXX 1 994.6241001.7061001.643 1.00 0.00
ATOM 22 C22 XXX 1 999.5591002.2611004.338 1.00 0.00
ATOM 23 C23 XXX 1 999.6311003.7241004.772 1.00 0.00
ATOM 24 O24 XXX 1 1000.6441004.3881004.825 1.00 0.00
ATOM 25 O25 XXX 1 998.4921004.2351005.231 1.00 0.00
ATOM 26 H26 XXX 1 1003.0141001.2471002.966 1.00 0.00
ATOM 27 H27 XXX 1 1002.2421002.3781004.075 1.00 0.00
ATOM 28 H28 XXX 1 1002.9601002.9751002.585 1.00 0.00
ATOM 29 H29 XXX 1 1003.1441000.613 997.724 1.00 0.00
ATOM 30 H30 XXX 1 1002.6821001.232 995.359 1.00 0.00
ATOM 31 H31 XXX 1 1000.1901004.486 996.692 1.00 0.00
ATOM 32 H32 XXX 1 1000.6211003.856 999.061 1.00 0.00
ATOM 33 H33 XXX 1 999.5391001.142 998.495 1.00 0.00
ATOM 34 H34 XXX 1 997.0531001.139 998.468 1.00 0.00
ATOM 35 H35 XXX 1 996.9941001.9891002.715 1.00 0.00
ATOM 36 H36 XXX 1 994.7821002.6911002.100 1.00 0.00
ATOM 37 H37 XXX 1 994.8381000.9261002.384 1.00 0.00
ATOM 38 H38 XXX 1 993.5711001.6251001.346 1.00 0.00
ATOM 39 H39 XXX 1 1000.2501001.6871004.964 1.00 0.00
ATOM 40 H40 XXX 1 998.5591001.8821004.580 1.00 0.00
ATOM 41 H41 XXX 1 998.8251005.1191005.462 1.00 0.00
ATOM 42 C1 XXX 2 1000.227 998.3851002.728 1.00 0.00
ATOM 43 C2 XXX 2 1000.499 998.3121001.240 1.00 0.00
ATOM 44 C3 XXX 2 1001.780 998.2301000.707 1.00 0.00
ATOM 45 C4 XXX 2 1001.547 998.162 999.286 1.00 0.00
ATOM 46 C5 XXX 2 1000.190 998.227 999.053 1.00 0.00
ATOM 47 N6 XXX 2 999.467 998.2451000.284 1.00 0.00
ATOM 48 C7 XXX 2 998.090 998.1271000.528 1.00 0.00
ATOM 49 O8 XXX 2 997.577 998.3871001.604 1.00 0.00
ATOM 50 C9 XXX 2 997.233 997.574 999.418 1.00 0.00
ATOM 51 C10 XXX 2 996.008 998.194 999.106 1.00 0.00
ATOM 52 C11 XXX 2 995.197 997.685 998.078 1.00 0.00
ATOM 53 C12 XXX 2 995.597 996.541 997.372 1.00 0.00
ATOM 54 C13 XXX 2 996.796 995.893 997.701 1.00 0.00
ATOM 55 C14 XXX 2 997.608 996.399 998.730 1.00 0.00
ATOM 56 CL15 XXX 2 994.588 995.910 996.088 1.00 0.00
ATOM 57 C16 XXX 2 999.708 998.359 997.738 1.00 0.00
ATOM 58 C17 XXX 2 1000.633 998.316 996.675 1.00 0.00
ATOM 59 C18 XXX 2 1002.013 998.164 996.922 1.00 0.00
ATOM 60 C19 XXX 2 1002.484 998.098 998.251 1.00 0.00
ATOM 61 O20 XXX 2 1002.867 998.134 995.839 1.00 0.00
ATOM 62 C21 XXX 2 1004.285 997.924 995.971 1.00 0.00
ATOM 63 C22 XXX 2 1003.084 998.2691001.473 1.00 0.00
ATOM 64 C23 XXX 2 1003.474 996.9371002.105 1.00 0.00
ATOM 65 O24 XXX 2 1003.050 996.5131003.158 1.00 0.00
ATOM 66 O25 XXX 2 1004.401 996.2461001.445 1.00 0.00
ATOM 67 H26 XXX 2 999.596 999.2491002.966 1.00 0.00
ATOM 68 H27 XXX 2 1001.137 998.4801003.329 1.00 0.00
ATOM 69 H28 XXX 2 999.705 997.4841003.070 1.00 0.00
ATOM 70 H29 XXX 2 995.688 999.071 999.665 1.00 0.00
ATOM 71 H30 XXX 2 994.258 998.172 997.832 1.00 0.00
ATOM 72 H31 XXX 2 997.094 994.998 997.161 1.00 0.00
ATOM 73 H32 XXX 2 998.535 995.890 998.986 1.00 0.00
ATOM 74 H33 XXX 2 998.655 998.510 997.524 1.00 0.00
ATOM 75 H34 XXX 2 1000.287 998.404 995.650 1.00 0.00
ATOM 76 H35 XXX 2 1003.540 998.024 998.484 1.00 0.00
ATOM 77 H36 XXX 2 1004.483 996.964 996.465 1.00 0.00
ATOM 78 H37 XXX 2 1004.738 998.732 996.559 1.00 0.00
ATOM 79 H38 XXX 2 1004.753 997.909 994.980 1.00 0.00
ATOM 80 H39 XXX 2 1003.053 999.0221002.265 1.00 0.00
ATOM 81 H40 XXX 2 1003.896 998.5821000.806 1.00 0.00
ATOM 82 H41 XXX 2 1004.458 995.4871002.050 1.00 0.00
TER
ENDMDL
58 changes: 35 additions & 23 deletions iodata/test/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,22 @@ def check_water(mol):
assert_equal(mol.bonds[:, :2], [[0, 1], [1, 2]])


def check_load_dump_consistency(tmpdir, fn):
"""Check if dumping and loading an PDB file results in the same data."""
mol0 = load_one(str(fn))
@pytest.mark.parametrize("fn_base,should_warn", [
("water_single.pdb", False),
("water_single_model.pdb", False),
("ch5plus.pdb", False),
("2luv.pdb", True),
("2bcw.pdb", False),
])
def test_load_dump_consistency(fn_base, should_warn, tmpdir):
with as_file(files('iodata.test.data').joinpath(fn_base)) as fn_pdb:
if should_warn:
with pytest.warns(FileFormatWarning) as record:
mol0 = load_one(str(fn_pdb))
assert len(record) > 1
else:
mol0 = load_one(str(fn_pdb))

# write pdb file in a temporary folder & then read it
fn_tmp = os.path.join(tmpdir, 'test.pdb')
dump_one(mol0, fn_tmp)
Expand All @@ -87,21 +100,10 @@ def check_load_dump_consistency(tmpdir, fn):
assert_equal(mol0.bonds, mol1.bonds)


@pytest.mark.parametrize("fn_base", [
"water_single.pdb",
"water_single_model.pdb",
"ch5plus.pdb",
"2luv.pdb",
"2bcw.pdb",
])
def test_load_dump_consistency(fn_base, tmpdir):
with as_file(files("iodata.test.data").joinpath(fn_base)) as fn_pdb:
check_load_dump_consistency(tmpdir, fn_pdb)

def test_load_dump_xyz_consistency(tmpdir):
with as_file(files("iodata.test.data").joinpath("water.xyz")) as fn_xyz:
mol0 = load_one(str(fn_xyz))

def check_load_dump_xyz_consistency(tmpdir, fn):
"""Check if dumping PDB from an xyz file results in the same data."""
mol0 = load_one(str(fn))
# write xyz file in a temporary folder & then read it
fn_tmp = os.path.join(tmpdir, 'test.pdb')
dump_one(mol0, fn_tmp)
Expand All @@ -123,15 +125,12 @@ def check_load_dump_xyz_consistency(tmpdir, fn):
assert mol1.bonds is None


def test_load_dump_xyz_consistency(tmpdir):
with as_file(files("iodata.test.data").joinpath("water.xyz")) as fn_xyz:
check_load_dump_xyz_consistency(tmpdir, fn_xyz)


def test_load_peptide_2luv():
# test pdb of small peptide
with as_file(files("iodata.test.data").joinpath("2luv.pdb")) as fn_pdb:
mol = load_one(str(fn_pdb))
with pytest.warns(FileFormatWarning) as record:
mol = load_one(str(fn_pdb))
assert len(record) == 271
assert mol.title.startswith("INTEGRIN")
assert_equal(len(mol.atnums), 547)
restypes = mol.atffparams.get('restypes')
Expand Down Expand Up @@ -218,3 +217,16 @@ def test_load_ch5plus_bonds():
with as_file(files("iodata.test.data").joinpath("ch5plus.pdb")) as fn_pdb:
mol = load_one(fn_pdb)
assert_equal(mol.bonds[:, :2], [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]])


def test_indomethacin_dimer():
with as_file(files("iodata.test.data").joinpath("indomethacin-dimer.pdb")) as fn_pdb:
with pytest.warns(FileFormatWarning) as record:
mol = load_one(fn_pdb)
assert len(record) == 82
for item in record:
assert "Using the atom name in the PDB" in item.message.args[0]
assert mol.atnums[13] == 6
assert mol.atnums[14] == 17
assert mol.atnums[15] == 6
assert mol.atnums[55] == 17

0 comments on commit ca6113b

Please sign in to comment.