Skip to content

Commit

Permalink
Add parsing of anisotropic temperature factors from .pdb files.
Browse files Browse the repository at this point in the history
  • Loading branch information
samirelanduk committed May 23, 2018
1 parent c4307f5 commit 24b7659
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 21 deletions.
2 changes: 1 addition & 1 deletion atomium/files/pdbdict2pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def atom_dict_to_atom(atom_dict):
return Atom(
atom_dict["element"], atom_dict["x"], atom_dict["y"], atom_dict["z"],
id=atom_dict["atom_id"], name=atom_dict["atom_name"],
charge=atom_dict["charge"],
charge=atom_dict["charge"], anisotropy=atom_dict["anisotropy"],
bfactor=atom_dict["temp_factor"] if atom_dict["temp_factor"] else 0
)

Expand Down
33 changes: 27 additions & 6 deletions atomium/files/pdbstring2pdbdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,32 +162,38 @@ def extract_structure(pdb_dict, lines):
model_lines = get_lines("MODEL", lines, number=True)
atom_lines = get_lines("ATOM", lines, number=bool(model_lines))
hetatm_lines = get_lines("HETATM", lines, number=bool(model_lines))
anisou_lines = get_lines("ANISOU", lines, number=bool(model_lines))
conect_lines = get_lines("CONECT", lines)
pdb_dict["models"] = []
if model_lines:
for index, (line, line_number) in enumerate(model_lines):
next_line_number = model_lines[index + 1][1] if (
index < len(model_lines) - 1
) else len(lines)
model_atoms = [line for line, number in atom_lines
m_atoms = [line for line, number in atom_lines
if line_number < number < next_line_number]
model_h_atms = [line for line, number in hetatm_lines
m_het = [line for line, number in hetatm_lines
if line_number < number < next_line_number]
pdb_dict["models"].append(lines_to_model(model_atoms, model_h_atms))
m_anisou = [line for line, number in anisou_lines
if line_number < number < next_line_number]
pdb_dict["models"].append(lines_to_model(m_atoms, m_het, m_anisou))
else:
pdb_dict["models"].append(lines_to_model(atom_lines, hetatm_lines))
pdb_dict["models"].append(
lines_to_model(atom_lines, hetatm_lines, anisou_lines)
)
extract_connections(pdb_dict, conect_lines)


def lines_to_model(atom_lines, hetatm_lines):
"""Creates a model ``dict`` from ATOM lines and HETATM lines.
def lines_to_model(atom_lines, hetatm_lines, anisou_lines):
"""Creates a model ``dict`` from ATOM, ANISOU and HETATM lines.
:param list atom_lines: the ATOM lines.
:param list hetatm_lines: the HETATM lines.
:rtype: ``dict``"""

atoms = [atom_line_to_atom_dict(line) for line in atom_lines]
heteroatoms = [atom_line_to_atom_dict(line) for line in hetatm_lines]
assign_anisou(anisou_lines, atoms, heteroatoms)
molecules = atoms_to_residues(heteroatoms)
chains = atoms_to_chains(atoms)
model = {"molecules": molecules, "chains": chains}
Expand Down Expand Up @@ -264,6 +270,21 @@ def atoms_to_chains(atoms):
return chains


def assign_anisou(anisou_lines, atoms, heteroatoms):
"""Takes a list of ANISOU records, and atom and heteroatom lists, and
assigns anisotropy information the atoms and heteroatoms.
:param list anisou_lines: The ANISOU records.
:param list atoms: The atom ``dict`` objects to update.
:param list heteroatoms: The heteroatom ``dict`` objects to update."""

anisotropy = {int(line[6:11].strip()): [
int(line[n * 7 + 28:n * 7 + 35]) / 10000 for n in range(6)
] for line in anisou_lines}
for atom in atoms + heteroatoms:
atom["anisotropy"] = anisotropy.get(atom["atom_id"], [0, 0, 0, 0, 0, 0])


def extract_connections(pdb_dict, lines):
"""Takes a ``dict`` and adds connection information to it by parsing conect
lines.
Expand Down
1 change: 1 addition & 0 deletions tests/integration/files/1lol.pdb
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ SCALE1 0.017370 0.000000 0.001301 0.00000
SCALE2 0.000000 0.018024 0.000000 0.00000
SCALE3 0.000000 0.000000 0.015164 0.00000
ATOM 1 N VAL A 11 3.696 33.898 63.219 1.00 21.50 N
ANISOU 1 N VAL A 11 2406 1892 1614 198 519 -328 N
ATOM 2 CA VAL A 11 3.198 33.218 61.983 1.00 19.76 C
ATOM 3 C VAL A 11 3.914 31.863 61.818 1.00 19.29 C
ATOM 4 O VAL A 11 5.132 31.792 61.932 1.00 19.78 O
Expand Down
8 changes: 7 additions & 1 deletion tests/integration/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ def test_can_read_pdb(self):
model = pdb.model
self.assertEqual(len(model.atoms()), 3431)
atom = model.atom(2934)
self.assertEqual(atom.anisotropy, [0, 0, 0, 0, 0, 0])
self.assertEqual(
model.atom(1).anisotropy,
[0.2406, 0.1892, 0.1614, 0.0198, 0.0519, -0.0328]
)
self.assertEqual(atom.element, "N")
self.assertEqual(atom.name, "NE")
self.assertEqual(atom.location, (-20.082, 79.647, 41.645))
Expand Down Expand Up @@ -204,6 +209,7 @@ def test_can_read_pdb_data(self):
"x": 3.696, "y": 33.898, "z": 63.219,
"occupancy": 1.0, "temp_factor": 21.50,
"element": "N", "charge": 0.0,
"anisotropy": [0.2406, 0.1892, 0.1614, 0.0198, 0.0519, -0.0328]
}
)
self.assertEqual(len(data_file["connections"]), 60)
Expand Down Expand Up @@ -234,7 +240,7 @@ def test_can_fetch_pdb_data(self):
"chain_id": "A", "residue_id": 11, "insert_code": "",
"x": 3.696, "y": 33.898, "z": 63.219,
"occupancy": 1.0, "temp_factor": 21.50,
"element": "N", "charge": 0.0,
"element": "N", "charge": 0.0, "anisotropy": [0, 0, 0, 0, 0, 0]
}
)
self.assertEqual(len(data_file["connections"]), 60)
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/files_tests/test_pdb_dict_to_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def setUp(self):
"chain_id": "B", "residue_id": 13, "insert_code": "C",
"x": 12.681, "y": 37.302, "z": -25.211,
"occupancy": 0.5, "temp_factor": 15.56,
"element": "N", "charge": -2
"element": "N", "charge": -2, "anisotropy": [1, 2, 3, 4, 5, 6]
}

@patch("atomium.files.pdbdict2pdb.Atom")
Expand All @@ -185,7 +185,7 @@ def test_can_convert_atom_dict_to_atom(self, mock_atom):
returned_atom = atom_dict_to_atom(self.atom_dict)
self.assertIs(returned_atom, atom)
mock_atom.assert_called_with(
"N", 12.681, 37.302, -25.211,
"N", 12.681, 37.302, -25.211, anisotropy=[1, 2, 3, 4, 5, 6],
id=107, name="N1", charge=-2, bfactor=15.56
)

Expand All @@ -197,7 +197,7 @@ def test_can_convert_atom_dict_to_atom_with_no_temp_factor(self, mock_atom):
self.atom_dict["temp_factor"] = None
returned_atom = atom_dict_to_atom(self.atom_dict)
mock_atom.assert_called_with(
"N", 12.681, 37.302, -25.211,
"N", 12.681, 37.302, -25.211, anisotropy=[1, 2, 3, 4, 5, 6],
id=107, name="N1", charge=-2, bfactor=0
)

Expand Down
48 changes: 38 additions & 10 deletions tests/unit/files_tests/test_pdb_string_to_pdb_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ class StructureExtractionTests(TestCase):
def setUp(self):
self.pdb_dict = {}
self.lines = [
"model1", "atom1", "hetam1", "model2", "atom2", "hetam2", "con1", "con2"
"model1", "atom1", "an1", "hetam1", "model2", "atom2", "an2", "hetam2", "con1", "con2"
]


Expand All @@ -317,15 +317,16 @@ def setUp(self):
@patch("atomium.files.pdbstring2pdbdict.lines_to_model")
def test_can_extract_structure_one_model(self, mock_model, mock_con, mock_lines):
mock_lines.side_effect = [
[], ["atom1", "atom2"], ["hetatm1", "hetatm2"], ["con1", "con2"]
[], ["atom1", "atom2"], ["hetatm1", "hetatm2"], ["an1", "an2"], ["con1", "con2"]
]
mock_model.return_value = {"model": "1"}
extract_structure(self.pdb_dict, self.lines)
mock_lines.assert_any_call("MODEL", self.lines, number=True)
mock_lines.assert_any_call("ATOM", self.lines, number=False)
mock_lines.assert_any_call("HETATM", self.lines, number=False)
mock_lines.assert_any_call("ANISOU", self.lines, number=False)
mock_lines.assert_any_call("CONECT", self.lines)
mock_model.assert_called_with(["atom1", "atom2"], ["hetatm1", "hetatm2"])
mock_model.assert_called_with(["atom1", "atom2"], ["hetatm1", "hetatm2"], ["an1", "an2"])
mock_con.assert_called_with(self.pdb_dict, ["con1", "con2"])
self.assertEqual(self.pdb_dict["models"], [{"model": "1"}])

Expand All @@ -335,38 +336,45 @@ def test_can_extract_structure_one_model(self, mock_model, mock_con, mock_lines)
@patch("atomium.files.pdbstring2pdbdict.lines_to_model")
def test_can_extract_structure_multiple_models(self, mock_model, mock_con, mock_lines):
mock_lines.side_effect = [
[[self.lines[0], 0], [self.lines[3], 3]], [[self.lines[1], 1], [self.lines[4], 4]],
[[self.lines[2], 2], [self.lines[5], 5]], [self.lines[6], self.lines[7]],
[[self.lines[0], 0], [self.lines[4], 4]], [[self.lines[1], 1], [self.lines[5], 5]],
[[self.lines[3], 3], [self.lines[7], 7]], [[self.lines[2], 2], [self.lines[6], 6]],
[self.lines[8], self.lines[9]],
]
mock_model.side_effect = [{"model": "1"}, {"model": "2"}]
extract_structure(self.pdb_dict, self.lines)
mock_lines.assert_any_call("MODEL", self.lines, number=True)
mock_lines.assert_any_call("ATOM", self.lines, number=True)
mock_lines.assert_any_call("HETATM", self.lines, number=True)
mock_lines.assert_any_call("CONECT", self.lines)
mock_model.assert_any_call([self.lines[1]], [self.lines[2]])
mock_model.assert_any_call([self.lines[4]], [self.lines[5]])
mock_con.assert_called_with(self.pdb_dict, self.lines[6:8])
mock_model.assert_any_call([self.lines[1]], [self.lines[3]], [self.lines[2]])
mock_model.assert_any_call([self.lines[5]], [self.lines[7]], [self.lines[6]])
mock_con.assert_called_with(self.pdb_dict, self.lines[8:10])
self.assertEqual(self.pdb_dict["models"], [{"model": "1"}, {"model": "2"}])



class LinesToModelTests(TestCase):

@patch("atomium.files.pdbstring2pdbdict.atom_line_to_atom_dict")
@patch("atomium.files.pdbstring2pdbdict.assign_anisou")
@patch("atomium.files.pdbstring2pdbdict.atoms_to_residues")
@patch("atomium.files.pdbstring2pdbdict.atoms_to_chains")
def test_can_convert_lines_to_model(self, mock_chain, mock_res, mock_dict):
def test_can_convert_lines_to_model(self, mock_chain, mock_res, mock_an, mock_dict):
mock_dict.side_effect = [
{"a": 1}, {"a": 2}, {"a": 3}, {"a": 4},
{"h": 1}, {"h": 2}, {"h": 3}, {"h": 4}
]
mock_res.return_value = [{"m": 1}, {"m": 2}]
mock_chain.return_value = [{"c": 1}, {"c": 2}]
model = lines_to_model(["a1", "a2", "a3", "a4"], ["h1", "h2", "h3", "h4"])
model = lines_to_model(["a1", "a2", "a3", "a4"], ["h1", "h2", "h3", "h4"], ["an1", "an2"])
for char in ["a", "h"]:
for num in ["1", "2", "3", "4"]:
mock_dict.assert_any_call(char + num)
mock_an.assert_called_with(
["an1", "an2"],
[{"a": 1}, {"a": 2}, {"a": 3}, {"a": 4}],
[{"h": 1}, {"h": 2}, {"h": 3}, {"h": 4}]
)
mock_res.assert_called_with([{"h": 1}, {"h": 2}, {"h": 3}, {"h": 4}])
mock_chain.assert_called_with([{"a": 1}, {"a": 2}, {"a": 3}, {"a": 4}])
self.assertEqual(model, {
Expand Down Expand Up @@ -455,6 +463,26 @@ def test_can_convert_atoms_to_chains(self, mock_res):



class AnisouAssingingTests(TestCase):

def test_can_assign_anisou(self):
atoms = [{"atom_id": 107}, {"atom_id": 108}]
heteroatoms = [{"atom_id": 109}, {"atom_id": 110}]
assign_anisou([
"ANISOU 107 N GLY A 13 2406 1892 1614 198 519 -328",
"ANISOU 110 O GLY A 13 3837 2505 1611 164 -121 189"
], atoms, heteroatoms)
self.assertEqual(atoms, [
{"atom_id": 107, "anisotropy": [0.2406, 0.1892, 0.1614, 0.0198, 0.0519, -0.0328]},
{"atom_id": 108, "anisotropy": [0, 0, 0, 0, 0, 0]}
])
self.assertEqual(heteroatoms, [
{"atom_id": 109, "anisotropy": [0, 0, 0, 0, 0, 0]},
{"atom_id": 110, "anisotropy": [0.3837, 0.2505, 0.1611, 0.0164, -0.0121, 0.0189]}
])



class ConnectionExtractionTests(TestCase):

def test_extract_connections(self):
Expand Down

0 comments on commit 24b7659

Please sign in to comment.