diff --git a/atomium/mmcif.py b/atomium/mmcif.py index 933549f1..5eab0814 100644 --- a/atomium/mmcif.py +++ b/atomium/mmcif.py @@ -199,7 +199,7 @@ def mmcif_dict_to_data_dict(mmcif_dict): "technique": None, "source_organism": None, "expression_system": None, "missing_residues": [] }, "quality": {"resolution": None, "rvalue": None, "rfree": None}, - "geometry": {"assemblies": []}, "models": [] + "geometry": {"assemblies": [], "crystallography": {}}, "models": [] } update_description_dict(mmcif_dict, data_dict) update_experiment_dict(mmcif_dict, data_dict) @@ -292,6 +292,7 @@ def update_geometry_dict(mmcif_dict, data_dict): if assembly["software"] == "?": assembly["software"] = None assign_metrics_to_assembly(mmcif_dict, assembly) assign_transformations_to_assembly(mmcif_dict, operations, assembly) + update_crystallography_dict(mmcif_dict, data_dict) def assign_metrics_to_assembly(mmcif_dict, assembly): @@ -356,6 +357,26 @@ def get_operation_id_groups(expression): return group_ids +def update_crystallography_dict(mmcif_dict, data_dict): + """Takes a data dictionary and updates its crystallography + sub-sub-dictionary with information from a .mmcif dictionary. + + :param dict mmcif_dict: the .mmcif dictionary to read. + :param dict data_dict: the data dictionary to update.""" + + mmcif_to_data_transfer(mmcif_dict, data_dict["geometry"], "crystallography", + "space_group", "symmetry", "space_group_name_H-M") + if mmcif_dict.get("cell"): + data_dict["geometry"]["crystallography"]["unit_cell"] = [ + float(mmcif_dict["cell"][0][key]) for key in [ + "length_a", "length_b", "length_c", "angle_alpha", "angle_beta", "angle_gamma" + ] + ] + if data_dict["geometry"]["crystallography"].get("space_group") == "NA": + data_dict["geometry"]["crystallography"] = {} + + + def operation_id_groups_to_operations(operations, operation_id_groups): """Creates a list of operation matrices for an assembly, from a list of operation IDs - cross multiplying as required. diff --git a/atomium/mmtf.py b/atomium/mmtf.py index f4e81cb5..b394962f 100644 --- a/atomium/mmtf.py +++ b/atomium/mmtf.py @@ -150,7 +150,7 @@ def mmtf_dict_to_data_dict(mmtf_dict): "technique": None, "source_organism": None, "expression_system": None, "missing_residues": [] }, "quality": {"resolution": None, "rvalue": None, "rfree": None}, - "geometry": {"assemblies": []}, "models": [] + "geometry": {"assemblies": [], "crystallography": {}}, "models": [] } mmtf_to_data_transfer(mmtf_dict, data_dict, "description", "code", "structureId") @@ -166,6 +166,12 @@ def mmtf_dict_to_data_dict(mmtf_dict): "quality", "rvalue", "rWork", trim=3) mmtf_to_data_transfer(mmtf_dict, data_dict, "quality", "rfree", "rFree", trim=3) + mmtf_to_data_transfer(mmtf_dict, data_dict["geometry"], + "crystallography", "space_group", "spaceGroup") + mmtf_to_data_transfer(mmtf_dict, data_dict["geometry"], + "crystallography", "unit_cell", "unitCell", trim=3) + if data_dict["geometry"]["crystallography"].get("space_group") == "NA": + data_dict["geometry"]["crystallography"] = {} data_dict["geometry"]["assemblies"] = [{ "id": int(a["name"]), "software": None, "delta_energy": None, "buried_surface_area": None, "surface_area": None, "transformations": [{ @@ -334,7 +340,10 @@ def mmtf_to_data_transfer(mmtf_dict, data_dict, d_cat, d_key, m_key, value = mmtf_dict[m_key] if date: value = datetime.strptime(value, "%Y-%m-%d").date() if first: value = value[0] - if trim: value = round(value, trim) + if trim: + try: + value = [round(v, trim) for v in value] + except: value = round(value, trim) data_dict[d_cat][d_key] = value except: pass diff --git a/atomium/pdb.py b/atomium/pdb.py index a7b6b615..32d4157b 100644 --- a/atomium/pdb.py +++ b/atomium/pdb.py @@ -76,7 +76,7 @@ def pdb_dict_to_data_dict(pdb_dict): "technique": None, "source_organism": None, "expression_system": None, "missing_residues": [] }, "quality": {"resolution": None, "rvalue": None, "rfree": None}, - "geometry": {"assemblies": []}, "models": [] + "geometry": {"assemblies": [], "crystallography": {}}, "models": [] } update_description_dict(pdb_dict, data_dict) update_experiment_dict(pdb_dict, data_dict) @@ -130,6 +130,7 @@ def update_geometry_dict(pdb_dict, data_dict): :param dict data_dict: The data dictionary to update.""" extract_assembly_remark(pdb_dict, data_dict["geometry"]) + extract_crystallography(pdb_dict, data_dict["geometry"]) def update_models_list(pdb_dict, data_dict): @@ -343,6 +344,22 @@ def assembly_lines_to_assembly_dict(lines): return assembly +def extract_crystallography(pdb_dict, geometry_dict): + """Takes a ``dict`` and adds assembly information to it by parsing the + CRYST1 record. + + :param dict pdb_dict: the ``dict`` to read. + :param dict geometry_dict: the ``dict`` to update.""" + + if pdb_dict.get("CRYST1"): + line = pdb_dict["CRYST1"][0] + values = line.split() + geometry_dict["crystallography"]["space_group"] = line[55:66].strip() + geometry_dict["crystallography"]["unit_cell"] = [ + float(val) for val in values[1:7] + ] + + def make_sequences(pdb_dict): """Creates a mapping of chain IDs to sequences, by parsing SEQRES records. diff --git a/tests/integration/test_file_reading.py b/tests/integration/test_file_reading.py index 3e6d43bd..00dac79f 100644 --- a/tests/integration/test_file_reading.py +++ b/tests/integration/test_file_reading.py @@ -160,7 +160,11 @@ def test_1lol_data_dict(self): "matrix": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], "vector": [0.0, 0.0, 0.0] }] - }]}) + }], "crystallography": { + "space_group": "P 1 21 1", "unit_cell": [ + 57.57, 55.482, 66.129, 90, 94.28, 90 + ] + }}) self.assertEqual(len(d["models"]), 1) self.assertEqual(len(d["models"][0]["polymer"]), 2) self.assertEqual(len(d["models"][0]["polymer"]["A"]["sequence"]), 229) @@ -302,6 +306,9 @@ def test_5xme_data_dict(self): self.assertEqual(d["quality"], { "resolution": None, "rvalue": None, "rfree": None }) + self.assertEqual(d["geometry"]["crystallography"], { + "space_group": "P 1", "unit_cell": [1, 1, 1, 90, 90, 90] + } if e == "pdb" else {}) self.assertEqual(len(d["models"]), 10) for model in d["models"][1:]: self.assertEqual(len(model["polymer"]), len(d["models"][0]["polymer"])) diff --git a/tests/unit/test_mmcif.py b/tests/unit/test_mmcif.py index 6d4dc750..76f5e786 100644 --- a/tests/unit/test_mmcif.py +++ b/tests/unit/test_mmcif.py @@ -208,7 +208,7 @@ def test_can_convert_mmcif_dict_to_data_dict(self, mock_md, mock_gm, mock_ql, mo "technique": None, "source_organism": None, "expression_system": None, "missing_residues": [] }, "quality": {"resolution": None, "rvalue": None, "rfree": None}, - "geometry": {"assemblies": []}, "models": [] + "geometry": {"assemblies": [], "crystallography": {}}, "models": [] }) @@ -277,15 +277,18 @@ def test_can_update_quality_dictionary(self, mock_trans): class GeometryDictUpdatingTests(TestCase): - def test_can_update_geometry_with_nothing(self): + @patch("atomium.mmcif.update_crystallography_dict") + def test_can_update_geometry_with_nothing(self, mock_up): m, d = {}, {"geometry": {"assemblies": []}} update_geometry_dict(m, d) self.assertEqual(d, {"geometry": {"assemblies": []}}) + mock_up.assert_called_with(m, d) @patch("atomium.mmcif.assign_metrics_to_assembly") @patch("atomium.mmcif.assign_transformations_to_assembly") - def test_can_add_assemblies_to_geometry(self, mock_trans, mock_ass): + @patch("atomium.mmcif.update_crystallography_dict") + def test_can_add_assemblies_to_geometry(self, mock_up, mock_trans, mock_ass): d = {"geometry": {"assemblies": []}} m = {"pdbx_struct_assembly": [{ "id": "1", "method_details": "PISA", @@ -322,6 +325,7 @@ def test_can_add_assemblies_to_geometry(self, mock_trans, mock_ass): for assembly in d["geometry"]["assemblies"]: mock_ass.assert_any_call(m, assembly) mock_trans.assert_any_call(m, operations, assembly) + mock_up.assert_called_with(m, d) @@ -480,6 +484,43 @@ def test_can_multiply_groups(self): +class CrystallographyDictUpdatingTests(TestCase): + + @patch("atomium.mmcif.mmcif_to_data_transfer") + def test_can_update_crystallography_dict(self, mock_trans): + m = {"cell": [{ + "length_a": "1", "length_b": "2", "length_c": "3", + "angle_alpha": "4", "angle_beta": "5", "angle_gamma": "6" + }]} + d = {"geometry": {"crystallography": {}}} + update_crystallography_dict(m, d) + mock_trans.assert_called_with(m, d["geometry"], "crystallography", + "space_group", "symmetry", "space_group_name_H-M") + self.assertEqual(d["geometry"]["crystallography"]["unit_cell"], [1, 2, 3, 4, 5, 6]) + + + @patch("atomium.mmcif.mmcif_to_data_transfer") + def test_can_handle_missing_cell(self, mock_trans): + m = {} + d = {"geometry": {"crystallography": {}}} + update_crystallography_dict(m, d) + mock_trans.assert_called_with(m, d["geometry"], "crystallography", + "space_group", "symmetry", "space_group_name_H-M") + self.assertEqual(d["geometry"]["crystallography"], {}) + + + @patch("atomium.mmcif.mmcif_to_data_transfer") + def test_can_handle_NA(self, mock_trans): + m = {} + d = {"geometry": {"crystallography": {"space_group": "NA"}}} + update_crystallography_dict(m, d) + mock_trans.assert_called_with(m, d["geometry"], "crystallography", + "space_group", "symmetry", "space_group_name_H-M") + self.assertEqual(d["geometry"]["crystallography"], {}) + + + + class ModelsListUpdatingTests(TestCase): @patch("atomium.mmcif.make_sequences") diff --git a/tests/unit/test_mmtf.py b/tests/unit/test_mmtf.py index 597008a1..c6c6ea70 100644 --- a/tests/unit/test_mmtf.py +++ b/tests/unit/test_mmtf.py @@ -220,6 +220,8 @@ def test_can_convert_mmtf_dict_to_data_dict(self, mock_up, mock_trans): mock_trans.assert_any_call(m, d, "quality", "resolution", "resolution", trim=3) mock_trans.assert_any_call(m, d, "quality", "rvalue", "rWork", trim=3) mock_trans.assert_any_call(m, d, "quality", "rfree", "rFree", trim=3) + mock_trans.assert_any_call(m, d["geometry"], "crystallography", "space_group", "spaceGroup") + mock_trans.assert_any_call(m, d["geometry"], "crystallography", "unit_cell", "unitCell", trim=3) mock_up.assert_called_with(m, d) self.assertEqual(d, { "description": { @@ -236,7 +238,7 @@ def test_can_convert_mmtf_dict_to_data_dict(self, mock_up, mock_trans): }, { "chains": ["A", "C"], "matrix": ["ABC", "EFG", "IJK"], "vector": "DHL" }] - }]}, "models": [] + }], "crystallography": {}}, "models": [] }) @@ -454,6 +456,9 @@ def test_can_transfer_from_mmtf_to_data_dict_round(self): self.assertEqual(self.d["B"][5], 10.1) mmtf_to_data_transfer(self.m, self.d, "B", 5, "M", trim=2) self.assertEqual(self.d["B"][5], 10.13) + self.m["M"] = [10.13122334, 1.119973] + mmtf_to_data_transfer(self.m, self.d, "B", 5, "M", trim=2) + self.assertEqual(self.d["B"][5], [10.13, 1.12]) diff --git a/tests/unit/test_pdb.py b/tests/unit/test_pdb.py index 85862e06..5eb10f49 100644 --- a/tests/unit/test_pdb.py +++ b/tests/unit/test_pdb.py @@ -93,7 +93,7 @@ def test_can_convert_pdb_dict_to_data_dict(self, mock_md, mock_gm, mock_ql, mock "technique": None, "source_organism": None, "expression_system": None, "missing_residues": [] }, "quality": {"resolution": None, "rvalue": None, "rfree": None}, - "geometry": {"assemblies": []}, "models": [] + "geometry": {"assemblies": [], "crystallography": {}}, "models": [] }) @@ -146,11 +146,13 @@ def test_can_update_quality_dict(self, mock_rfac, mock_res): class GeometryDictUpdatingTests(TestCase): @patch("atomium.pdb.extract_assembly_remark") - def test_can_update_geometry_dict(self, mock_ass): + @patch("atomium.pdb.extract_crystallography") + def test_can_update_geometry_dict(self, mock_crys, mock_ass): d = {"geometry": "dict"} pdb_dict = {"PDB": "DICT"} update_geometry_dict(pdb_dict, d) mock_ass.assert_called_with(pdb_dict, "dict") + mock_crys.assert_called_with(pdb_dict, "dict") @@ -569,6 +571,25 @@ def test_can_parse_sparse_assembly(self): +class CrystallographyExtractionTests(TestCase): + + def test_missing_crystallography_extraction(self): + d = {} + extract_crystallography({}, d) + self.assertEqual(d, {}) + + + def test_can_extract_crystallography(self): + d = {"crystallography": {}} + extract_crystallography({"CRYST1": [ + "CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1" + ]}, d) + self.assertEqual(d, {"crystallography": { + "space_group": "P 1", "unit_cell": [1, 1, 1, 90, 90, 90]} + }) + + + class SequenceMakingTests(TestCase): def test_can_make_no_sequences(self):