Skip to content

Commit

Permalink
adds tests for impc evidence and provenance models
Browse files Browse the repository at this point in the history
  • Loading branch information
kshefchek committed May 18, 2016
1 parent af404df commit 02a538d
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 17 deletions.
2 changes: 1 addition & 1 deletion dipper/models/Provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(self, graph):
def add_study_parts(self, study, study_parts):
for part in study_parts:
self.graph_utils.addTriple(self.graph, study,
self.graph_utils.object_properties['part_of'],
self.graph_utils.object_properties['has_part'],
part)
return

Expand Down
52 changes: 47 additions & 5 deletions dipper/sources/IMPC.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,24 @@ def _add_study_provenance(self, impc_map, parameter_map,
procedure_stable_id, procedure_name,
parameter_stable_id, parameter_name,
statistical_method, resource_name):
"""
:param impc_map: dict, generated from map file
see self._get_impc_mappings() docstring
:param parameter_map: dict, generated from map file
see _get_parameter_mappings() docstring
:param phenotyping_center: str, from self.files['all']
:param colony: str, from self.files['all']
:param project_fullname: str, from self.files['all']
:param pipeline_name: str, from self.files['all']
:param pipeline_stable_id: str, from self.files['all']
:param procedure_stable_id: str, from self.files['all']
:param procedure_name: str, from self.files['all']
:param parameter_stable_id: str, from self.files['all']
:param parameter_name: str, from self.files['all']
:param statistical_method: str, from self.files['all']
:param resource_name: str, from self.files['all']
:return: study bnode
"""

provenance_model = Provenance(self.graph)
graph_utils = GraphUtils(curie_map.get())
Expand Down Expand Up @@ -565,6 +583,7 @@ def _add_study_provenance(self, impc_map, parameter_map,

study_parts.append(impc_map['pipelines'][pipeline_stable_id])
study_parts.append(impc_map['statistical_method'][statistical_method])
provenance_model.add_study_parts(study_bnode, study_parts)

# Add parameter/measure statement: study measures parameter
graph_utils.addIndividualToGraph(self.graph, parameter_map[parameter_stable_id],
Expand Down Expand Up @@ -593,14 +612,27 @@ def _add_study_provenance(self, impc_map, parameter_map,
self.graph, impc_map['project'][project_fullname],
project_fullname, provenance_model.provenance_types['project'])
graph_utils.addTriple(
self.graph, study_bnode, graph_utils.object_properties['has_part'],
self.graph, study_bnode, graph_utils.object_properties['part_of'],
impc_map['project'][project_fullname])

return study_bnode

def _add_evidence(self, assoc_id, eco_id, impc_map, p_value,
percentage_change, effect_size, study_bnode,
phenotyping_center):
"""
:param assoc_id: assoc curie used to reify a
genotype to phenotype association, generated in _process_data()
:param eco_id: eco_id as curie, hardcoded in _process_data()
:param impc_map: dict, generated from map file
see self._get_impc_mappings() docstring
:param p_value: str, from self.files['all']
:param percentage_change: str, from self.files['all']
:param effect_size: str, from self.files['all']
:param study_bnode: str, from self.files['all']
:param phenotyping_center: str, from self.files['all']
:return: str, evidence_line_bnode as curie
"""

evidence_model = Evidence(self.graph)
provenance_model = Provenance(self.graph)
Expand All @@ -609,6 +641,8 @@ def _add_evidence(self, assoc_id, eco_id, impc_map, p_value,
# Add line of evidence
evidence_line_bnode = self.make_id("{0}{1}".format(assoc_id, study_bnode), '_')
evidence_model.add_supporting_evidence(assoc_id, evidence_line_bnode)
graph_utils.addIndividualToGraph(self.graph, evidence_line_bnode, None,
eco_id)

# Add supporting measurements to line of evidence
measurements = {}
Expand Down Expand Up @@ -658,8 +692,12 @@ def _add_evidence(self, assoc_id, eco_id, impc_map, p_value,

def _get_impc_mappings(self):
"""
Opens impc mapping file and returns dict of mappings
:return: dict
Opens impc mapping file stored in self.map_files['impc_code_map']
nd returns dict of mappings
This file is generated by manually curating codes and free text to
their IRI counterpart
:return: dict, where [code] = iri
"""
impc_mappings = {}
if os.path.exists(os.path.join(os.path.dirname(__file__),
Expand All @@ -675,8 +713,12 @@ def _get_impc_mappings(self):

def _get_parameter_mappings(self):
"""
Opens impc procedure map file and returns dict of mappings
:return: dict
Opens impc procedure map file stored in self.map_files['parameter_map']
and returns dict of mappings
This file is generated by running a series of scripts in
the scripts directory, see scripts/README.md
:return: dict, where [code] = iri
"""
parameter_mappings = {}
if os.path.exists(os.path.join(os.path.dirname(__file__),
Expand Down
4 changes: 1 addition & 3 deletions dipper/sources/Source.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def whoami(self):
return

@staticmethod
def make_id(long_string, prefix=None):
def make_id(long_string, prefix='MONARCH'):
"""
a method to create unique identifiers based on very long strings
currently implemented with md5
Expand All @@ -209,8 +209,6 @@ def make_id(long_string, prefix=None):
# probably not the best long-term solution
# note others available:
# sha1(), sha224(), sha256(), sha384(), and sha512()
if prefix is None:
prefix = 'MONARCH'

byte_string = long_string.encode("utf-8")

Expand Down
1 change: 1 addition & 0 deletions resources/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This directory contains source data mapped to ontologies or web resources
141 changes: 133 additions & 8 deletions tests/test_impc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
# from tests import test_general, test_source
from tests.test_source import SourceTestCase
from dipper.sources.IMPC import IMPC
# from dipper import curie_map
from rdflib.namespace import URIRef
from dipper.utils.CurieUtil import CurieUtil
from dipper import curie_map
from dipper.utils.TestUtils import TestUtils


logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)
Expand All @@ -33,20 +37,141 @@ def tearDown(self):
# return


class EvidenceProvenanceTestCase():
class EvidenceProvenanceTestCase(unittest.TestCase):

def setUp(self):
self.assoc_curie = 'MONARCH:test_association'
self.eco_id = 'ECO:0000059'

self.test_set_1 = ('MGI:1920145', 'Setd5', 'WTSI', 'MEFW', 'male',
'heterozygote', 'MGI:4432631', 'Setd5<tm1a(EUCOMM)Wtsi>',
'targeted mutation 1a', 'Wellcome Trust Sanger Institute',
'MGI:2159965', 'C57BL/6N', 'MGP',
'Wellcome Trust Sanger Institute Mouse Genetics Project',
'MGP Select Pipeline', 'MGP_001', 'MGP_XRY_001', 'X-ray',
'IMPC_XRY_008_001', 'Number of ribs right', 'MP:0005390',
'skeleton phenotype', 'MP:0000480', 'increased rib number',
'1.637023E-010', '', '8.885439E-007',
'Wilcoxon rank sum test with continuity correction', 'IMPC')

# Generate test curies, these are otherwise generated
# within _add_evidence() and _add_study_provenance()
self.study_curie = "_:study"
self.evidence_curie = "_:evidence"

# IRIs for testing sparql output
curie_dict = curie_map.get()
curie_util= CurieUtil(curie_dict)
self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie))

return

def test_evidence_model(self):
"""
Functional test for _add_evidence()
"""
impc = IMPC()
impc.load_bindings()
test_env = TestUtils(impc.graph)
impc_map = impc._get_impc_mappings()

(p_value, percentage_change, effect_size) = self.test_set_1[24:27]
phenotyping_center = self.test_set_1[2]

impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
percentage_change, effect_size, self.study_curie,
phenotyping_center)

sparql_query = """
SELECT ?assoc
WHERE {
?assoc OBO:SEPIO_0000007 ?evidenceline .
?evidenceline a OBO:ECO_0000059 ;
OBO:SEPIO_0000018 <http://www.sanger.ac.uk/> ;
OBO:SEPIO_0000084 ?measure1 ;
OBO:SEPIO_0000084 ?measure2 ;
OBO:SEPIO_0000106 _:study .
?measure1 a OBO:OBI_0000175 ;
OBO:RO_0002353 _:study ;
OBO:STATO_0000129 1.637023e-10 .
?measure2 a OBO:STATO_0000085 ;
OBO:RO_0002353 _:study ;
OBO:STATO_0000129 "8.885439E-007" .
}
"""
sparql_output = test_env.query_graph(sparql_query)
expected_results = [[self.assoc_iri]]
self.assertEqual(sparql_output, expected_results)

def test_provenance_model(self):
"""
Functional test for _add_study_provenance()
"""
impc = IMPC()
impc.load_bindings()
test_env = TestUtils(impc.graph)
impc_map = impc._get_impc_mappings()
parameter_map = impc._get_parameter_mappings()

(phenotyping_center, colony) = self.test_set_1[2:4]
(project_fullname, pipeline_name, pipeline_stable_id,
procedure_stable_id, procedure_name, parameter_stable_id,
parameter_name) = self.test_set_1[13:20]
(statistical_method, resource_name) = self.test_set_1[27:29]

impc._add_study_provenance(
impc_map, parameter_map, phenotyping_center, colony,
project_fullname, pipeline_name, pipeline_stable_id,
procedure_stable_id, procedure_name,
parameter_stable_id, parameter_name,
statistical_method, resource_name)

sparql_query = """
SELECT *
WHERE {
<https://www.mousephenotype.org/impress/procedures/15> a owl:NamedIndividual ;
rdfs:label "MGP Select Pipeline" .
<https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
rdfs:label "X-ray" .
<http://www.sanger.ac.uk/> a foaf:organization ;
rdfs:label "WTSI" .
<http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .
<https://www.mousephenotype.org/impress/parameterontologies/1867/175> a owl:NamedIndividual ;
rdfs:label "Number of ribs right" .
?study a OBO:OBI_0000471 ;
OBO:BFO_0000051 OBO:STATO_0000076 ;
OBO:BFO_0000051 <https://www.mousephenotype.org/impress/procedures/15> ;
OBO:BFO_0000051 <https://www.mousephenotype.org/impress/protocol/175/15> ;
OBO:BFO_0000051 <https://www.mousephenotype.org/impress/parameterontologies/1867/175> ;
OBO:BFO_0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
OBO:RO_0000057 ?colony ;
OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> .
?colony a owl:NamedIndividual ;
rdfs:label "MEFW" .
}
"""

sparql_output = test_env.query_graph(sparql_query)
# Should output a single row for ?study and ?colony
# print(sparql_output)
# >> [[rdflib.term.BNode('f3015fe2476ce825a8c6978af6222d87'),
# rdflib.term.BNode('9328ff6b6455b01254a5548c3cfcc8c4')]]

self.assertEqual(len(sparql_output[0]), 2)

def tearDown(self):
return

# @unittest.skip('test not yet defined')
# def test_hpotest(self):
# logger.info("An IMPC-specific test")
#
# return


if __name__ == '__main__':
unittest.main()

0 comments on commit 02a538d

Please sign in to comment.