Skip to content

Commit

Permalink
Add parsing of extra R-factor information (implements #15)
Browse files Browse the repository at this point in the history
  • Loading branch information
samirelanduk committed Jun 13, 2018
1 parent c782d8e commit ac4a725
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 16 deletions.
30 changes: 30 additions & 0 deletions atomium/files/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def __init__(self):
self._technique = None
self._classification = None
self._rfactor = None
self._rfree = None
self._rcount = None
self._keywords = []


Expand Down Expand Up @@ -114,6 +116,34 @@ def rfactor(self, rfactor):
self._rfactor = rfactor


@property
def rfree(self):
"""The Pdb's Free R-factor.
:rtype: ``float``"""

return self._rfree


@rfree.setter
def rfree(self, rfree):
self._rfree = rfree


@property
def rcount(self):
"""The Pdb's R-factor test set count.
:rtype: ``float``"""

return self._rcount


@rcount.setter
def rcount(self, rcount):
self._rcount = rcount


@property
def organism(self):
"""The Pdb's source organism.
Expand Down
5 changes: 4 additions & 1 deletion atomium/files/pdb2pdbdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def pdb_to_pdb_dict(pdb):
pdb_dict["title"] = pdb._title
pdb_dict["resolution"] = pdb._resolution
pdb_dict["rfactor"] = pdb._rfactor
pdb_dict["rfree"] = pdb._rfree
pdb_dict["rcount"] = pdb._rcount
pdb_dict["organism"] = pdb._organism
pdb_dict["expression_system"] = pdb._expression_system
pdb_dict["technique"] = pdb._technique
Expand Down Expand Up @@ -48,7 +50,8 @@ def structure_to_pdb_dict(structure):
"models": [model], "connections": connections,
"deposition_date": None, "code": None, "title": None, "resolution": None,
"organism": None, "expression_system": None, "technique": None,
"classification": None, "rfactor": None, "keywords": [], "sequences": {}
"classification": None, "rfactor": None, "rfree": None, "rcount": None,
"keywords": [], "sequences": {}
}


Expand Down
2 changes: 2 additions & 0 deletions atomium/files/pdbdict2pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def pdb_dict_to_pdb(pdb_dict):
pdb._technique = pdb_dict["technique"]
pdb._classification = pdb_dict["classification"]
pdb._rfactor = pdb_dict["rfactor"]
pdb._rfree = pdb_dict["rfree"]
pdb._rcount = pdb_dict["rcount"]
pdb._keywords = pdb_dict["keywords"]
pdb._models = [model_dict_to_model(
d, pdb_dict["connections"], pdb_dict["sequences"]
Expand Down
11 changes: 10 additions & 1 deletion atomium/files/pdbdict2pdbstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,20 @@ def pack_rfactor(lines, pdb_dict):
:param list lines: The record lines to add to.
:param dict pdb_dict: The data dictionary to pack."""

if pdb_dict["rfactor"] is not None:
if any([pdb_dict["rfactor"], pdb_dict["rfree"], pdb_dict["rcount"]]):
lines.append("REMARK 3".ljust(80))
if pdb_dict["rfactor"] is not None:
lines.append("REMARK 3 R VALUE (WORKING SET) : {}".format(
pdb_dict["rfactor"]
).ljust(80))
if pdb_dict["rfree"] is not None:
lines.append("REMARK 3 FREE R VALUE : {}".format(
pdb_dict["rfree"]
).ljust(80))
if pdb_dict["rcount"] is not None:
lines.append("REMARK 3 FREE R VALUE TEST SET COUNT : {}".format(
int(pdb_dict["rcount"])
).ljust(80))


def pack_source(lines, pdb_dict):
Expand Down
28 changes: 17 additions & 11 deletions atomium/files/pdbstring2pdbdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,23 @@ def extract_rfactor(pdb_dict, lines):
:param list lines: the file lines to read from."""

remark_lines = get_lines("REMARK", lines)
pattern = r"R VALUE[ ]{2,}\(WORKING SET\) : (.+)"
for remark in remark_lines:
if int(remark[7:10]) == 3 and remark[10:].strip():
matches = re.findall(pattern, remark)
if matches:
try:
pdb_dict["rfactor"] = float(matches[0].strip())
break
except: pass
else:
pdb_dict["rfactor"] = None
patterns = {
"rfactor": r"R VALUE[ ]{2,}\(WORKING SET\) : (.+)",
"rfree": r"FREE R VALUE[ ]{2,}: (.+)",
"rcount": r"FREE R VALUE TEST SET COUNT[ ]{2,}: (.+)"
}
for attribute, pattern in patterns.items():
for remark in remark_lines:
if int(remark[7:10]) == 3 and remark[10:].strip():
matches = re.findall(pattern, remark[10:].strip())
if matches:
try:
pdb_dict[attribute] = float(matches[0].strip())
break
except: pass
for attr in patterns:
if attr not in pdb_dict:
pdb_dict[attr] = None


def extract_source(pdb_dict, lines):
Expand Down
2 changes: 2 additions & 0 deletions tests/integration/files/1lol_output.pdb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ REMARK 2
REMARK 2 RESOLUTION. 1.90 ANGSTROMS.
REMARK 3
REMARK 3 R VALUE (WORKING SET) : 0.193
REMARK 3 FREE R VALUE : 0.229
REMARK 3 FREE R VALUE TEST SET COUNT : 1583
SEQRES 1 A 229 LEU ARG SER ARG ARG VAL ASP VAL MET ASP VAL MET ASN
SEQRES 2 A 229 ARG LEU ILE LEU ALA MET ASP LEU MET ASN ARG ASP ASP
SEQRES 3 A 229 ALA LEU ARG VAL THR GLY GLU VAL ARG GLU TYR ILE ASP
Expand Down
2 changes: 2 additions & 0 deletions tests/integration/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def test_can_read_pdb(self):
self.assertEqual(pdb.classification, "LYASE")
self.assertEqual(pdb.resolution, 1.9)
self.assertEqual(pdb.rfactor, 0.193)
self.assertEqual(pdb.rfree, 0.229)
self.assertEqual(pdb.rcount, 1583)
self.assertEqual(pdb.keywords, ["TIM BARREL", "LYASE"])

# Atoms are correct
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/files_tests/test_pdb_dict_to_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_can_convert_pdb_dict_to_pdb(self, mock_model, mock_pdb):
"deposition_date": "D", "code": "C", "title": "T", "resolution": 1.4,
"organism": "H. sap", "expression_system": "M. mus",
"technique": "TECHNIQUE", "classification": "CLASS", "rfactor": 4.5,
"keywords": ["A", "B"],
"keywords": ["A", "B"], "rfree": 0.3, "rcount": 50,
"models": ["1", "2", "3"],
"connections": ["c1", "c2"],
"sequences": {"A": "SEQUENCE"}
Expand All @@ -34,6 +34,8 @@ def test_can_convert_pdb_dict_to_pdb(self, mock_model, mock_pdb):
self.assertEqual(returned_pdb._technique, "TECHNIQUE")
self.assertEqual(returned_pdb._classification, "CLASS")
self.assertEqual(returned_pdb._rfactor, 4.5)
self.assertEqual(returned_pdb._rfree, 0.3)
self.assertEqual(returned_pdb._rcount, 50)
self.assertEqual(returned_pdb._keywords, ["A", "B"])
self.assertEqual(returned_pdb._models, ["model1", "model2", "model3"])

Expand Down
24 changes: 22 additions & 2 deletions tests/unit/files_tests/test_pdb_dict_to_pdb_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def setUp(self):
"deposition_date": datetime(1990, 9, 1).date(),
"code": "1XYZ", "title": "ABC" * 40, "resolution": 1.9,
"technique": "TECH", "classification": "CLASS", "rfactor": 3.4,
"keywords": ["AA", "BBB", "CCCCC"],
"keywords": ["AA", "BBB", "CCCCC"], "rfree": 2.3, "rcount": 19,
"organism": "HOMO SAPIENS", "expression_system": "MUS MUSCULUS",
"sequences": {"A": ["AAA", "BBB"], "C": ["CCC"]}
}
Expand Down Expand Up @@ -128,16 +128,36 @@ def test_can_pack_no_resolution(self):

class RfactorPackingTests(PdbStringCreationTest):

def test_can_pack_resolution(self):
def test_can_pack_rfactor(self):
pack_rfactor(self.lines, self.pdb_dict)
self.assertEqual(self.lines[0], "REMARK 3" + " " * 70)
self.assertEqual(
self.lines[1], "REMARK 3 R VALUE (WORKING SET) : 3.4".ljust(80)
)
self.assertEqual(
self.lines[2], "REMARK 3 FREE R VALUE : 2.3".ljust(80)
)
self.assertEqual(
self.lines[3], "REMARK 3 FREE R VALUE TEST SET COUNT : 19".ljust(80)
)


def test_can_pack_partial_rfactor(self):
self.pdb_dict["rfree"] = None
self.pdb_dict["rcount"] = None
pack_rfactor(self.lines, self.pdb_dict)
self.assertEqual(self.lines[0], "REMARK 3" + " " * 70)
self.assertEqual(
self.lines[1], "REMARK 3 R VALUE (WORKING SET) : 3.4".ljust(80)
)
self.assertEqual(len(self.lines), 2)



def test_can_pack_no_rfactor(self):
self.pdb_dict["rfactor"] = None
self.pdb_dict["rfree"] = None
self.pdb_dict["rcount"] = None
pack_rfactor(self.lines, self.pdb_dict)
self.assertEqual(self.lines, [])

Expand Down
2 changes: 2 additions & 0 deletions tests/unit/files_tests/test_pdb_string_to_pdb_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def test_rfactor_extraction(self, mock_lines):
extract_rfactor(self.pdb_dict, self.lines)
mock_lines.assert_any_call("REMARK", self.lines)
self.assertEqual(self.pdb_dict["rfactor"], 0.193)
self.assertEqual(self.pdb_dict["rfree"], 0.229)
self.assertEqual(self.pdb_dict["rcount"], 1583)



Expand Down
8 changes: 8 additions & 0 deletions tests/unit/files_tests/test_pdb_to_pdb_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ def test_can_convert_pdb_to_pdb_dict_one_model(self, mock_seq, mock_dict):
pdb._title = "T"
pdb._resolution = 1.5
pdb._rfactor = 1.8
pdb._rfree = 2.4
pdb._rcount = 18
pdb._organism = "O"
pdb._expression_system = "E"
pdb._technique = "T"
Expand All @@ -28,6 +30,7 @@ def test_can_convert_pdb_to_pdb_dict_one_model(self, mock_seq, mock_dict):
"deposition_date": "D", "code": "C", "title": "T", "resolution": 1.5,
"organism": "O", "expression_system": "E", "technique": "T",
"classification": "CLASS", "rfactor": 1.8, "keywords": ["a", "b"],
"rfree": 2.4, "rcount": 18,
"models": ["m1"], "connections": ["c1", "c2"], "sequences": "SEQ"
})

Expand All @@ -42,6 +45,8 @@ def test_can_convert_pdb_to_pdb_dict_two_models(self, mock_seq, mock_dict):
pdb._title = "T"
pdb._resolution = 1.5
pdb._rfactor = 1.8
pdb._rfree = 2.4
pdb._rcount = 18
pdb._organism = "O"
pdb._expression_system = "E"
pdb._technique = "T"
Expand All @@ -60,6 +65,7 @@ def test_can_convert_pdb_to_pdb_dict_two_models(self, mock_seq, mock_dict):
"deposition_date": "D", "code": "C", "title": "T", "resolution": 1.5,
"organism": "O", "expression_system": "E", "technique": "T",
"classification": "CLASS", "rfactor": 1.8, "keywords": ["a", "b"],
"rfree": 2.4, "rcount": 18,
"models": ["m1", "m2"], "connections": ["c1", "c2"], "sequences": "SEQ"
})

Expand Down Expand Up @@ -92,6 +98,8 @@ def test_can_convert_model_to_pdb_dict(self, mock_con, mock_chain, mock_atom):
"title": None,
"resolution": None,
"rfactor": None,
"rfree": None,
"rcount": None,
"organism": None,
"expression_system": None,
"technique": None,
Expand Down
34 changes: 34 additions & 0 deletions tests/unit/files_tests/test_pdbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def test_can_create_pdb(self):
self.assertEqual(pdb._technique, None)
self.assertEqual(pdb._classification, None)
self.assertEqual(pdb._rfactor, None)
self.assertEqual(pdb._rfree, None)
self.assertEqual(pdb._rcount, None)
self.assertEqual(pdb._keywords, [])


Expand Down Expand Up @@ -216,6 +218,38 @@ def test_can_update_rfactor(self):



class PdbFreeRfactorTests(TestCase):

def test_can_get_pdb_rfree(self):
pdb = Pdb()
pdb._rfree = 1.2
self.assertIs(pdb._rfree, pdb.rfree)


def test_can_update_rfactor(self):
pdb = Pdb()
pdb._rfree = 1.2
pdb.rfree = 1.5
self.assertEqual(pdb._rfree, 1.5)



class PdbRfactorCountTests(TestCase):

def test_can_get_pdb_rcount(self):
pdb = Pdb()
pdb._rcount = 1.2
self.assertIs(pdb._rcount, pdb.rcount)


def test_can_update_rcount(self):
pdb = Pdb()
pdb._rcount = 1.2
pdb.rcount = 1.5
self.assertEqual(pdb._rcount, 1.5)



class PdbTechniqueTests(TestCase):

def test_can_get_pdb_keywords(self):
Expand Down

0 comments on commit ac4a725

Please sign in to comment.