From 638e26223560e74200920f2a9bd5284fd10601e4 Mon Sep 17 00:00:00 2001 From: Sam Ireland Date: Mon, 17 Sep 2018 18:40:29 +0100 Subject: [PATCH] .mmtf header reading --- atomium/mmtf.py | 56 ++++++++++++++++++++++ atomium/utilities.py | 4 +- tests/integration/test_file_reading.py | 23 +++++++++ tests/unit/test_mmcif.py | 7 +++ tests/unit/test_mmtf.py | 64 ++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 2 deletions(-) diff --git a/atomium/mmtf.py b/atomium/mmtf.py index 3c5e14ba..41253068 100644 --- a/atomium/mmtf.py +++ b/atomium/mmtf.py @@ -3,6 +3,7 @@ import msgpack import struct from collections import deque +from datetime import datetime def mmtf_bytes_to_mmtf_dict(bytestring): """Takes the raw bytestring of a .mmtf file and turns it into a normal, @@ -129,3 +130,58 @@ def recursive_decode(integers, bits=16): index += 1 new.append(value) return new + + +def mmtf_dict_to_data_dict(mmtf_dict): + """Converts an .mmtf dictionary into an atomium data dictionary, with the + same standard layout that the other file formats get converted into. + + :param dict mmtf_dict: the .mmtf dictionary. + :rtype: ``dict``""" + + data_dict = { + "description": { + "code": None, "title": None, "deposition_date": None, + "classification": None, "keywords": [], "authors": [] + }, "experiment": { + "technique": None, "source_organism": None, "expression_system": None + }, "quality": {"resolution": None, "rvalue": None, "rfree": None} + } + mmtf_to_data_transfer(mmtf_dict, data_dict, + "description", "code", "structureId") + mmtf_to_data_transfer(mmtf_dict, data_dict, + "description", "title", "title") + mmtf_to_data_transfer(mmtf_dict, data_dict, + "description", "deposition_date", "depositionDate", date=True) + mmtf_to_data_transfer(mmtf_dict, data_dict, + "experiment", "technique", "experimentalMethods", first=True) + mmtf_to_data_transfer(mmtf_dict, data_dict, + "quality", "resolution", "resolution", trim=3) + mmtf_to_data_transfer(mmtf_dict, data_dict, + "quality", "rvalue", "rWork", trim=3) + mmtf_to_data_transfer(mmtf_dict, data_dict, + "quality", "rfree", "rFree", trim=3) + return data_dict + + +def mmtf_to_data_transfer(mmtf_dict, data_dict, d_cat, d_key, m_key, + date=False, first=False, trim=False): + """A function for transfering a bit of data from a .mmtf dictionary to a + data dictionary, or doing nothing if the data doesn't exist. + + :param dict mmtf_dict: the .mmtf dictionary to read. + :param dict data_dict: the data dictionary to update. + :param str d_cat: the top-level key in the data dictionary. + :param str d_key: the data dictionary field to update. + :param str m_key: the .mmtf field to read. + :param bool date: if True, the value will be converted to a date. + :param bool first: if True, the value's first item will be split used. + :param int trim: if given, the value will be rounded by this amount.""" + + try: + value = mmtf_dict[m_key] + if date: value = datetime.strptime(value, "%Y-%m-%d").date() + if first: value = value[0] + if trim: value = round(value, trim) + data_dict[d_cat][d_key] = value + except: pass diff --git a/atomium/utilities.py b/atomium/utilities.py index c52b56b2..9ffa94cf 100644 --- a/atomium/utilities.py +++ b/atomium/utilities.py @@ -3,7 +3,7 @@ import builtins from requests import get from .mmcif import mmcif_string_to_mmcif_dict, mmcif_dict_to_data_dict -from .mmtf import mmtf_bytes_to_mmtf_dict +from .mmtf import mmtf_bytes_to_mmtf_dict, mmtf_dict_to_data_dict from .pdb import pdb_string_to_pdb_dict def open(path, *args, **kwargs): @@ -43,6 +43,6 @@ def get_parse_functions(filestring, path): if ending in ("mmtf", "cif", "pdb"): return { "cif": (mmcif_string_to_mmcif_dict, mmcif_dict_to_data_dict), - "mmtf": (mmtf_bytes_to_mmtf_dict, None), + "mmtf": (mmtf_bytes_to_mmtf_dict, mmtf_dict_to_data_dict), "pdb": (pdb_string_to_pdb_dict, None) }[ending] diff --git a/tests/integration/test_file_reading.py b/tests/integration/test_file_reading.py index 2e6dfc10..ce7af440 100644 --- a/tests/integration/test_file_reading.py +++ b/tests/integration/test_file_reading.py @@ -88,6 +88,29 @@ def test_1igt_file_dict(self): self.assertEqual(d["insCodeList"][266], "A") + def test_1lol_data_dict(self): + d = atomium.open("tests/integration/files/1lol.mmtf", data_dict=True) + self.assertEqual(set(d.keys()), { + "description", "experiment", "quality" + }) + self.assertEqual(d["description"], { + "code": "1LOL", + "title": "Crystal structure of orotidine monophosphate decarboxylase complex with XMP", + "deposition_date": date(2002, 5, 6), + "classification": None, + "keywords": [], + "authors": [] + }) + self.assertEqual(d["experiment"], { + "technique": "X-RAY DIFFRACTION", + "source_organism": None, + "expression_system": None + }) + self.assertEqual(d["quality"], { + "resolution": 1.9, "rvalue": 0.193, "rfree": 0.229 + }) + + class PdbReadingTests(TestCase): diff --git a/tests/unit/test_mmcif.py b/tests/unit/test_mmcif.py index 881a24d0..c8345c9f 100644 --- a/tests/unit/test_mmcif.py +++ b/tests/unit/test_mmcif.py @@ -252,6 +252,13 @@ def setUp(self): self.m = {"M": [{10: "ten", 11: "2018-09-17"}], "N": [{12: "tw"}, {12: "tw2"}]} + def test_can_do_nothing(self): + mmcif_to_data_transfer(self.m, self.d, "A", 1, "X", 10) + self.assertEqual(self.d["A"][1], None) + mmcif_to_data_transfer(self.m, self.d, "A", 1, "M", 100) + self.assertEqual(self.d["A"][1], None) + + def test_can_transfer_from_mmcif_to_data_dict(self): mmcif_to_data_transfer(self.m, self.d, "A", 1, "M", 10) self.assertEqual(self.d["A"][1], "ten") diff --git a/tests/unit/test_mmtf.py b/tests/unit/test_mmtf.py index 6868f20e..f7e49e8b 100644 --- a/tests/unit/test_mmtf.py +++ b/tests/unit/test_mmtf.py @@ -1,5 +1,6 @@ from collections import deque import struct +from datetime import date from unittest import TestCase from unittest.mock import Mock, patch, PropertyMock, MagicMock from atomium.mmtf import * @@ -195,3 +196,66 @@ def test_can_recursive_decode_other_powers(self): self.assertEqual(recursive_decode([ 127, 41, 34, 1, 0, -50, -128, 0, 7, 127, 0, 127, 127, 14 ], bits=8), [168, 34, 1, 0, -50, -128, 7, 127, 268]) + + + +class MmtfDictToDataDictTests(TestCase): + + @patch("atomium.mmtf.mmtf_to_data_transfer") + def test_can_convert_mmtf_dict_to_data_dict(self, mock_trans): + m = {"A": "B"} + d = mmtf_dict_to_data_dict(m) + mock_trans.assert_any_call(m, d, "description", "code", "structureId") + mock_trans.assert_any_call(m, d, "description", "title", "title") + mock_trans.assert_any_call(m, d, "description", "deposition_date", "depositionDate", date=True) + mock_trans.assert_any_call(m, d, "experiment", "technique", "experimentalMethods", first=True) + mock_trans.assert_any_call(m, d, "quality", "resolution", "resolution", trim=3) + mock_trans.assert_any_call(m, d, "quality", "rvalue", "rWork", trim=3) + mock_trans.assert_any_call(m, d, "quality", "rfree", "rFree", trim=3) + self.assertEqual(d, { + "description": { + "code": None, "title": None, "deposition_date": None, + "classification": None, "keywords": [], "authors": [] + }, "experiment": { + "technique": None, "source_organism": None, "expression_system": None + }, "quality": {"resolution": None, "rvalue": None, "rfree": None} + }) + + + +class MmtfDictTransferTests(TestCase): + + def setUp(self): + self.d = {"A": {1: None, 2: None, 3: None}, "B": {4: None, 5: None}} + self.m = {"M": 10.127, "C": 100, "D": "2018-09-17", "L": [8, 9]} + + + def test_can_do_nothing(self): + mmtf_to_data_transfer(self.m, self.d, "A", 1, "X", 10) + self.assertEqual(self.d["A"][1], None) + mmtf_to_data_transfer(self.m, self.d, "A", 1, "M", 100) + self.assertEqual(self.d["A"][1], None) + + + def test_can_transfer_from_mmtf_to_data_dict(self): + mmtf_to_data_transfer(self.m, self.d, "A", 1, "M") + self.assertEqual(self.d["A"][1], 10.127) + mmtf_to_data_transfer(self.m, self.d, "B", 5, "C") + self.assertEqual(self.d["B"][5], 100) + + + def test_can_transfer_from_mmtf_to_data_dict_date(self): + mmtf_to_data_transfer(self.m, self.d, "B", 5, "D", date=True) + self.assertEqual(self.d["B"][5], date(2018, 9, 17)) + + + def test_can_transfer_from_mmtf_to_data_dict_first(self): + mmtf_to_data_transfer(self.m, self.d, "B", 5, "L", first=True) + self.assertEqual(self.d["B"][5], 8) + + + def test_can_transfer_from_mmtf_to_data_dict_round(self): + mmtf_to_data_transfer(self.m, self.d, "B", 5, "M", trim=1) + self.assertEqual(self.d["B"][5], 10.1) + mmtf_to_data_transfer(self.m, self.d, "B", 5, "M", trim=2) + self.assertEqual(self.d["B"][5], 10.13)