Skip to content

Commit

Permalink
Crystallography parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
samirelanduk committed May 1, 2019
1 parent d91ba93 commit 2af6e34
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 11 deletions.
23 changes: 22 additions & 1 deletion atomium/mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def mmcif_dict_to_data_dict(mmcif_dict):
"technique": None, "source_organism": None, "expression_system": None,
"missing_residues": []
}, "quality": {"resolution": None, "rvalue": None, "rfree": None},
"geometry": {"assemblies": []}, "models": []
"geometry": {"assemblies": [], "crystallography": {}}, "models": []
}
update_description_dict(mmcif_dict, data_dict)
update_experiment_dict(mmcif_dict, data_dict)
Expand Down Expand Up @@ -292,6 +292,7 @@ def update_geometry_dict(mmcif_dict, data_dict):
if assembly["software"] == "?": assembly["software"] = None
assign_metrics_to_assembly(mmcif_dict, assembly)
assign_transformations_to_assembly(mmcif_dict, operations, assembly)
update_crystallography_dict(mmcif_dict, data_dict)


def assign_metrics_to_assembly(mmcif_dict, assembly):
Expand Down Expand Up @@ -356,6 +357,26 @@ def get_operation_id_groups(expression):
return group_ids


def update_crystallography_dict(mmcif_dict, data_dict):
"""Takes a data dictionary and updates its crystallography
sub-sub-dictionary with information from a .mmcif dictionary.
:param dict mmcif_dict: the .mmcif dictionary to read.
:param dict data_dict: the data dictionary to update."""

mmcif_to_data_transfer(mmcif_dict, data_dict["geometry"], "crystallography",
"space_group", "symmetry", "space_group_name_H-M")
if mmcif_dict.get("cell"):
data_dict["geometry"]["crystallography"]["unit_cell"] = [
float(mmcif_dict["cell"][0][key]) for key in [
"length_a", "length_b", "length_c", "angle_alpha", "angle_beta", "angle_gamma"
]
]
if data_dict["geometry"]["crystallography"].get("space_group") == "NA":
data_dict["geometry"]["crystallography"] = {}



def operation_id_groups_to_operations(operations, operation_id_groups):
"""Creates a list of operation matrices for an assembly, from a list of
operation IDs - cross multiplying as required.
Expand Down
13 changes: 11 additions & 2 deletions atomium/mmtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def mmtf_dict_to_data_dict(mmtf_dict):
"technique": None, "source_organism": None, "expression_system": None,
"missing_residues": []
}, "quality": {"resolution": None, "rvalue": None, "rfree": None},
"geometry": {"assemblies": []}, "models": []
"geometry": {"assemblies": [], "crystallography": {}}, "models": []
}
mmtf_to_data_transfer(mmtf_dict, data_dict,
"description", "code", "structureId")
Expand All @@ -166,6 +166,12 @@ def mmtf_dict_to_data_dict(mmtf_dict):
"quality", "rvalue", "rWork", trim=3)
mmtf_to_data_transfer(mmtf_dict, data_dict,
"quality", "rfree", "rFree", trim=3)
mmtf_to_data_transfer(mmtf_dict, data_dict["geometry"],
"crystallography", "space_group", "spaceGroup")
mmtf_to_data_transfer(mmtf_dict, data_dict["geometry"],
"crystallography", "unit_cell", "unitCell", trim=3)
if data_dict["geometry"]["crystallography"].get("space_group") == "NA":
data_dict["geometry"]["crystallography"] = {}
data_dict["geometry"]["assemblies"] = [{
"id": int(a["name"]), "software": None, "delta_energy": None,
"buried_surface_area": None, "surface_area": None, "transformations": [{
Expand Down Expand Up @@ -334,7 +340,10 @@ def mmtf_to_data_transfer(mmtf_dict, data_dict, d_cat, d_key, m_key,
value = mmtf_dict[m_key]
if date: value = datetime.strptime(value, "%Y-%m-%d").date()
if first: value = value[0]
if trim: value = round(value, trim)
if trim:
try:
value = [round(v, trim) for v in value]
except: value = round(value, trim)
data_dict[d_cat][d_key] = value
except: pass

Expand Down
19 changes: 18 additions & 1 deletion atomium/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def pdb_dict_to_data_dict(pdb_dict):
"technique": None, "source_organism": None, "expression_system": None,
"missing_residues": []
}, "quality": {"resolution": None, "rvalue": None, "rfree": None},
"geometry": {"assemblies": []}, "models": []
"geometry": {"assemblies": [], "crystallography": {}}, "models": []
}
update_description_dict(pdb_dict, data_dict)
update_experiment_dict(pdb_dict, data_dict)
Expand Down Expand Up @@ -130,6 +130,7 @@ def update_geometry_dict(pdb_dict, data_dict):
:param dict data_dict: The data dictionary to update."""

extract_assembly_remark(pdb_dict, data_dict["geometry"])
extract_crystallography(pdb_dict, data_dict["geometry"])


def update_models_list(pdb_dict, data_dict):
Expand Down Expand Up @@ -343,6 +344,22 @@ def assembly_lines_to_assembly_dict(lines):
return assembly


def extract_crystallography(pdb_dict, geometry_dict):
"""Takes a ``dict`` and adds assembly information to it by parsing the
CRYST1 record.
:param dict pdb_dict: the ``dict`` to read.
:param dict geometry_dict: the ``dict`` to update."""

if pdb_dict.get("CRYST1"):
line = pdb_dict["CRYST1"][0]
values = line.split()
geometry_dict["crystallography"]["space_group"] = line[55:66].strip()
geometry_dict["crystallography"]["unit_cell"] = [
float(val) for val in values[1:7]
]


def make_sequences(pdb_dict):
"""Creates a mapping of chain IDs to sequences, by parsing SEQRES records.
Expand Down
9 changes: 8 additions & 1 deletion tests/integration/test_file_reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,11 @@ def test_1lol_data_dict(self):
"matrix": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
"vector": [0.0, 0.0, 0.0]
}]
}]})
}], "crystallography": {
"space_group": "P 1 21 1", "unit_cell": [
57.57, 55.482, 66.129, 90, 94.28, 90
]
}})
self.assertEqual(len(d["models"]), 1)
self.assertEqual(len(d["models"][0]["polymer"]), 2)
self.assertEqual(len(d["models"][0]["polymer"]["A"]["sequence"]), 229)
Expand Down Expand Up @@ -302,6 +306,9 @@ def test_5xme_data_dict(self):
self.assertEqual(d["quality"], {
"resolution": None, "rvalue": None, "rfree": None
})
self.assertEqual(d["geometry"]["crystallography"], {
"space_group": "P 1", "unit_cell": [1, 1, 1, 90, 90, 90]
} if e == "pdb" else {})
self.assertEqual(len(d["models"]), 10)
for model in d["models"][1:]:
self.assertEqual(len(model["polymer"]), len(d["models"][0]["polymer"]))
Expand Down
47 changes: 44 additions & 3 deletions tests/unit/test_mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def test_can_convert_mmcif_dict_to_data_dict(self, mock_md, mock_gm, mock_ql, mo
"technique": None, "source_organism": None, "expression_system": None,
"missing_residues": []
}, "quality": {"resolution": None, "rvalue": None, "rfree": None},
"geometry": {"assemblies": []}, "models": []
"geometry": {"assemblies": [], "crystallography": {}}, "models": []
})


Expand Down Expand Up @@ -277,15 +277,18 @@ def test_can_update_quality_dictionary(self, mock_trans):

class GeometryDictUpdatingTests(TestCase):

def test_can_update_geometry_with_nothing(self):
@patch("atomium.mmcif.update_crystallography_dict")
def test_can_update_geometry_with_nothing(self, mock_up):
m, d = {}, {"geometry": {"assemblies": []}}
update_geometry_dict(m, d)
self.assertEqual(d, {"geometry": {"assemblies": []}})
mock_up.assert_called_with(m, d)


@patch("atomium.mmcif.assign_metrics_to_assembly")
@patch("atomium.mmcif.assign_transformations_to_assembly")
def test_can_add_assemblies_to_geometry(self, mock_trans, mock_ass):
@patch("atomium.mmcif.update_crystallography_dict")
def test_can_add_assemblies_to_geometry(self, mock_up, mock_trans, mock_ass):
d = {"geometry": {"assemblies": []}}
m = {"pdbx_struct_assembly": [{
"id": "1", "method_details": "PISA",
Expand Down Expand Up @@ -322,6 +325,7 @@ def test_can_add_assemblies_to_geometry(self, mock_trans, mock_ass):
for assembly in d["geometry"]["assemblies"]:
mock_ass.assert_any_call(m, assembly)
mock_trans.assert_any_call(m, operations, assembly)
mock_up.assert_called_with(m, d)



Expand Down Expand Up @@ -480,6 +484,43 @@ def test_can_multiply_groups(self):



class CrystallographyDictUpdatingTests(TestCase):

@patch("atomium.mmcif.mmcif_to_data_transfer")
def test_can_update_crystallography_dict(self, mock_trans):
m = {"cell": [{
"length_a": "1", "length_b": "2", "length_c": "3",
"angle_alpha": "4", "angle_beta": "5", "angle_gamma": "6"
}]}
d = {"geometry": {"crystallography": {}}}
update_crystallography_dict(m, d)
mock_trans.assert_called_with(m, d["geometry"], "crystallography",
"space_group", "symmetry", "space_group_name_H-M")
self.assertEqual(d["geometry"]["crystallography"]["unit_cell"], [1, 2, 3, 4, 5, 6])


@patch("atomium.mmcif.mmcif_to_data_transfer")
def test_can_handle_missing_cell(self, mock_trans):
m = {}
d = {"geometry": {"crystallography": {}}}
update_crystallography_dict(m, d)
mock_trans.assert_called_with(m, d["geometry"], "crystallography",
"space_group", "symmetry", "space_group_name_H-M")
self.assertEqual(d["geometry"]["crystallography"], {})


@patch("atomium.mmcif.mmcif_to_data_transfer")
def test_can_handle_NA(self, mock_trans):
m = {}
d = {"geometry": {"crystallography": {"space_group": "NA"}}}
update_crystallography_dict(m, d)
mock_trans.assert_called_with(m, d["geometry"], "crystallography",
"space_group", "symmetry", "space_group_name_H-M")
self.assertEqual(d["geometry"]["crystallography"], {})




class ModelsListUpdatingTests(TestCase):

@patch("atomium.mmcif.make_sequences")
Expand Down
7 changes: 6 additions & 1 deletion tests/unit/test_mmtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ def test_can_convert_mmtf_dict_to_data_dict(self, mock_up, mock_trans):
mock_trans.assert_any_call(m, d, "quality", "resolution", "resolution", trim=3)
mock_trans.assert_any_call(m, d, "quality", "rvalue", "rWork", trim=3)
mock_trans.assert_any_call(m, d, "quality", "rfree", "rFree", trim=3)
mock_trans.assert_any_call(m, d["geometry"], "crystallography", "space_group", "spaceGroup")
mock_trans.assert_any_call(m, d["geometry"], "crystallography", "unit_cell", "unitCell", trim=3)
mock_up.assert_called_with(m, d)
self.assertEqual(d, {
"description": {
Expand All @@ -236,7 +238,7 @@ def test_can_convert_mmtf_dict_to_data_dict(self, mock_up, mock_trans):
}, {
"chains": ["A", "C"], "matrix": ["ABC", "EFG", "IJK"], "vector": "DHL"
}]
}]}, "models": []
}], "crystallography": {}}, "models": []
})


Expand Down Expand Up @@ -454,6 +456,9 @@ def test_can_transfer_from_mmtf_to_data_dict_round(self):
self.assertEqual(self.d["B"][5], 10.1)
mmtf_to_data_transfer(self.m, self.d, "B", 5, "M", trim=2)
self.assertEqual(self.d["B"][5], 10.13)
self.m["M"] = [10.13122334, 1.119973]
mmtf_to_data_transfer(self.m, self.d, "B", 5, "M", trim=2)
self.assertEqual(self.d["B"][5], [10.13, 1.12])



Expand Down
25 changes: 23 additions & 2 deletions tests/unit/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def test_can_convert_pdb_dict_to_data_dict(self, mock_md, mock_gm, mock_ql, mock
"technique": None, "source_organism": None, "expression_system": None,
"missing_residues": []
}, "quality": {"resolution": None, "rvalue": None, "rfree": None},
"geometry": {"assemblies": []}, "models": []
"geometry": {"assemblies": [], "crystallography": {}}, "models": []
})


Expand Down Expand Up @@ -146,11 +146,13 @@ def test_can_update_quality_dict(self, mock_rfac, mock_res):
class GeometryDictUpdatingTests(TestCase):

@patch("atomium.pdb.extract_assembly_remark")
def test_can_update_geometry_dict(self, mock_ass):
@patch("atomium.pdb.extract_crystallography")
def test_can_update_geometry_dict(self, mock_crys, mock_ass):
d = {"geometry": "dict"}
pdb_dict = {"PDB": "DICT"}
update_geometry_dict(pdb_dict, d)
mock_ass.assert_called_with(pdb_dict, "dict")
mock_crys.assert_called_with(pdb_dict, "dict")



Expand Down Expand Up @@ -569,6 +571,25 @@ def test_can_parse_sparse_assembly(self):



class CrystallographyExtractionTests(TestCase):

def test_missing_crystallography_extraction(self):
d = {}
extract_crystallography({}, d)
self.assertEqual(d, {})


def test_can_extract_crystallography(self):
d = {"crystallography": {}}
extract_crystallography({"CRYST1": [
"CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1"
]}, d)
self.assertEqual(d, {"crystallography": {
"space_group": "P 1", "unit_cell": [1, 1, 1, 90, 90, 90]}
})



class SequenceMakingTests(TestCase):

def test_can_make_no_sequences(self):
Expand Down

0 comments on commit 2af6e34

Please sign in to comment.