From 91d08b8c096fe987b8bee7155b0731a53009331f Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Tue, 14 May 2024 06:10:55 -0400 Subject: [PATCH] FHIR feature updates (#285) Partially addresses: - https://github.com/timsbiomed/issues/issues/85 ## Overview - Minor bug fixes - Dev updates (comments, etc) - Added test(s) ## Updates `7b91d57da9a5143c5ce8335897ad2542424797c6` ``` - Add: _test_to_fhir_json() ``` `157ae226921891feeaa38b4329c98b762b2b8727` ``` - Refactored 3 JSON writer functions to a state that they were probably intended to be in: 1 json function with branching logic depending on 'serialization' param. ``` `0ab8625887a1d57adeadd5ad6d3c47dd08524839` ``` - Update: Minor codestyle updates, comments, todo's - Delete: Unused schema/fhir_json.csv. Mappings will continue to be done via pure Python - Bugfix: extension.ValueString -> extension.valueString - Delete: Comments about spec field mappings ``` `0dd0772dc7bc4a263fa8e8f8e5c02c6f6c3264bf` ``` - Added: schema/fhir_json.csv: For automation of conversion by utilizing CSV export from curated GoogleSheet. - Update: Comments: Added state of mappings between SSSOM and FHIR ConceptMap ``` --- src/sssom/writers.py | 276 +++++++++++++++++++++------------------ tests/test_cli.py | 1 + tests/test_conversion.py | 10 ++ tests/test_writers.py | 40 +++++- 4 files changed, 194 insertions(+), 133 deletions(-) diff --git a/src/sssom/writers.py b/src/sssom/writers.py index f7077405..a26f22a2 100644 --- a/src/sssom/writers.py +++ b/src/sssom/writers.py @@ -8,6 +8,7 @@ import pandas as pd import yaml from curies import Converter +from deprecation import deprecated from jsonasobj2 import JsonObj from linkml_runtime.dumpers import JSONDumper, rdflib_dumper from linkml_runtime.utils.schemaview import SchemaView @@ -99,20 +100,50 @@ def write_rdf( print(t.decode(), file=file) -# todo: not sure the need for serialization param here; seems superfluous for some of these funcs -def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir") -> None: - """Write a mapping set dataframe to the file as FHIR ConceptMap JSON.""" - data = to_fhir_json(msdf) +def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") -> None: + """Write a mapping set dataframe to the file as JSON. + + :param serialisation: The JSON format to use. Supported formats are: + - fhir_json: Outputs JSON in FHIR ConceptMap format (https://fhir-ru.github.io/conceptmap.html) + https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_fhir_json + - json: Outputs to SSSOM JSON https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_json + - ontoportal_json: Outputs JSON in Ontoportal format (https://ontoportal.org/) + https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_ontoportal_json + """ + func_map: Dict[str, Callable] = { + "fhir_json": to_fhir_json, + "json": to_json, + "ontoportal_json": to_ontoportal_json, + } + if serialisation not in func_map: + raise ValueError( + f"Unknown JSON format: {serialisation}. Supported flavors: {', '.join(func_map.keys())}" + ) + func: Callable = func_map[serialisation] + data = func(msdf) json.dump(data, output, indent=2) -def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") -> None: - """Write a mapping set dataframe to the file as JSON.""" - if serialisation == "json": - data = to_json(msdf) - json.dump(data, output, indent=2) - else: - raise ValueError(f"Unknown json format: {serialisation}, currently only json supported") +@deprecated(deprecated_in="0.4.7", details="Use write_json() instead") +def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir_json") -> None: + """Write a mapping set dataframe to the file as FHIR ConceptMap JSON.""" + if serialisation != "fhir_json": + raise ValueError( + f"Unknown json format: {serialisation}, currently only fhir_json supported" + ) + write_json(msdf, output, serialisation="fhir_json") + + +@deprecated(deprecated_in="0.4.7", details="Use write_json() instead") +def write_ontoportal_json( + msdf: MappingSetDataFrame, output: TextIO, serialisation: str = "ontoportal_json" +) -> None: + """Write a mapping set dataframe to the file as the ontoportal mapping JSON model.""" + if serialisation != "ontoportal_json": + raise ValueError( + f"Unknown json format: {serialisation}, currently only ontoportal_json supported" + ) + write_json(msdf, output, serialisation="ontoportal_json") def write_owl( @@ -133,18 +164,6 @@ def write_owl( print(t.decode(), file=file) -def write_ontoportal_json( - msdf: MappingSetDataFrame, output: TextIO, serialisation: str = "ontoportal_json" -) -> None: - """Write a mapping set dataframe to the file as the ontoportal mapping JSON model.""" - if serialisation != "ontoportal_json": - raise ValueError( - f"Unknown json format: {serialisation}, currently only ontoportal_json supported" - ) - data = to_ontoportal_json(msdf) - json.dump(data, output, indent=2) - - # Converters # Converters convert a mappingsetdataframe to an object of the supportes types (json, pandas dataframe) @@ -271,29 +290,77 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict: :return: Dict: A Dictionary serializable as JSON. Resources: - - ConcpetMap::SSSOM mapping spreadsheet: https://docs.google.com/spreadsheets/d/1J19foBAYO8PCHwOfksaIGjNu-q5ILUKFh2HpOCgYle0/edit#gid=1389897118 + - ConceptMap::SSSOM mapping spreadsheet: + https://docs.google.com/spreadsheets/d/1J19foBAYO8PCHwOfksaIGjNu-q5ILUKFh2HpOCgYle0/edit#gid=1389897118 - TODOs + TODO: add to CLI & to these functions: r4 vs r5 param + TODO: What if the msdf doesn't have everything we need? (i) metadata, e.g. yml, (ii) what if we need to override? + - todo: later: allow any nested arbitrary override: (get in kwargs, else metadata.get(key, None)) + + Minor todos + todo: mapping_justification: consider `ValueString` -> `ValueCoding` https://github.com/timsbiomed/issues/issues/152 todo: when/how to conform to R5 instead of R4?: https://build.fhir.org/conceptmap.html - TODO: Add additional fields from both specs - - ConceptMap spec fields: https://www.hl7.org/fhir/r4/conceptmap.html - - Joe: Can also utilize: /Users/joeflack4/projects/hapi-fhir-jpaserver-starter/_archive/issues/sssom/example_json/minimal.json - - SSSOM more fields: - - prefix_map - - SSSOM spec fields: https://mapping-commons.github.io/sssom/Mapping/ """ + # Constants df: pd.DataFrame = msdf.df + # TODO: R4 (try this first) + # relatedto | equivalent | equal | wider | subsumes | narrower | specializes | inexact | unmatched | disjoint + # https://www.hl7.org/fhir/r4/conceptmap.html + # todo: r4: if not found, should likely be `null` or something. check docs to see if nullable, else ask on Zulip + # TODO: R5 Needs to be one of: + # related-to | equivalent | source-is-narrower-than-target | source-is-broader-than-target | not-related-to + # https://www.hl7.org/fhir/r4/valueset-concept-map-equivalence.html + # ill update that next time. i can map SSSOM SKOS/etc mappings to FHIR ones + # and then add the original SSSOM mapping CURIE fields somewhere else + # https://www.hl7.org/fhir/valueset-concept-map-equivalence.html + # https://github.com/mapping-commons/sssom-py/issues/258 + equivalence_map = { + # relateedto: The concepts are related to each other, and have at least some overlap in meaning, but the exact + # relationship is not known. + "skos:related": "relatedto", + "skos:relatedMatch": "relatedto", # canonical + # equivalent: The definitions of the concepts mean the same thing (including when structural implications of + # meaning are considered) (i.e. extensionally identical). + "skos:exactMatch": "equivalent", + # equal: The definitions of the concepts are exactly the same (i.e. only grammatical differences) and structural + # implications of meaning are identical or irrelevant (i.e. intentionally identical). + "equal": "equal", # todo what's difference between this and above? which to use? + # wider: The target mapping is wider in meaning than the source concept. + "skos:broader": "wider", + "skos:broadMatch": "wider", # canonical + # subsumes: The target mapping subsumes the meaning of the source concept (e.g. the source is-a target). + "rdfs:subClassOf": "subsumes", + # narrower: The target mapping is narrower in meaning than the source concept. The sense in which the mapping is + # narrower SHALL be described in the comments in this case, and applications should be careful when attempting + # to use these mappings operationally. + "skos:narrower": "narrower", + "skos:narrowMatch": "narrower", # canonical + # specializes: The target mapping specializes the meaning of the source concept (e.g. the target is-a source). + "sssom:superClassOf": "specializes", + # inexact: The target mapping overlaps with the source concept, but both source and target cover additional + # meaning, or the definitions are imprecise and it is uncertain whether they have the same boundaries to their + # meaning. The sense in which the mapping is inexact SHALL be described in the comments in this case, and + # applications should be careful when attempting to use these mappings operationally + "skos:closeMatch": "inexact", + # unmatched: There is no match for this concept in the target code system. + # todo: this is more complicated. This will be a combination of predicate_id and predicate_modifier (if + # present). See: https://github.com/mapping-commons/sssom/issues/185 + "unmatched": "unmatched", + # disjoint: This is an explicit assertion that there is no mapping between the + # source and target concept. + "owl:disjointWith": "disjoint", + } + # Intermediary variables metadata: Dict[str, Any] = msdf.metadata mapping_set_id = metadata.get("mapping_set_id", "") name: str = mapping_set_id.split("/")[-1].replace(".sssom.tsv", "") + # Construct JSON - # TODO: Fix: sssom/writers.py:293: error: Item "None" of "Optional[Dict[str, Any]]" has no attribute "get" - # ...a. Maybe remove the typing? b. remove the get? c. do outside of dict and add after?, d. Add "None"? maybe cant be done here - # ...e. Probably assign metadata to new object and use that instead. so won't read as None - json_obj = { + json_obj: Dict[str, Any] = { "resourceType": "ConceptMap", "url": mapping_set_id, + # Assumes mapping_set_id is a URI w/ artefact name at end. System becomes URI stem, value becomes artefact name "identifier": [ { "system": "/".join(mapping_set_id.split("/")[:-1]) + "/", @@ -302,7 +369,6 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict: ], "version": metadata.get("mapping_set_version", ""), "name": name, - "title": name, "status": "draft", # todo: when done: draft | active | retired | unknown "experimental": True, # todo: False when converter finished # todo: should this be date of last converted to FHIR json instead? @@ -329,95 +395,11 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict: # }], # "purpose": "", # todo: conceptmap "copyright": metadata.get("license", ""), - "sourceUri": metadata.get("subject_source", ""), # todo: correct? - "targetUri": metadata.get("object_source", ""), # todo: correct? + # TODO: Might want to make each "group" first, if there is more than 1 set of ontology1::ontology2 + # ...within a given MappingSet / set of SSSOM TSV rows. "group": [ { - "source": metadata.get("subject_source", ""), # todo: correct? - "target": metadata.get("object_source", ""), # todo: correct? - "element": [ - { - "code": row["subject_id"], - "display": row.get("subject_label", ""), - "target": [ - { - "code": row["object_id"], - "display": row.get("object_label", ""), - # TODO: R4 (try this first) - # relatedto | equivalent | equal | wider | subsumes | narrower | specializes | inexact | unmatched | disjoint - # https://www.hl7.org/fhir/r4/conceptmap.html - # todo: r4: if not found, eventually needs to be `null` or something. check docs to see if nullable, else ask on Zulip - # TODO: R5 Needs to be one of: - # related-to | equivalent | source-is-narrower-than-target | source-is-broader-than-target | not-related-to - # https://www.hl7.org/fhir/r4/valueset-concept-map-equivalence.html - # ill update that next time. i can map SSSOM SKOS/etc mappings to FHIR ones - # and then add the original SSSOM mapping CURIE fields somewhere else - # https://www.hl7.org/fhir/valueset-concept-map-equivalence.html - # https://github.com/mapping-commons/sssom-py/issues/258 - "equivalence": { - # relateedto: The concepts are related to each other, and have at least some overlap - # in meaning, but the exact relationship is not known. - "skos:related": "relatedto", - "skos:relatedMatch": "relatedto", # canonical - # equivalent: The definitions of the concepts mean the same thing (including when - # structural implications of meaning are considered) (i.e. extensionally identical). - "skos:exactMatch": "equivalent", - # equal: The definitions of the concepts are exactly the same (i.e. only grammatical - # differences) and structural implications of meaning are identical or irrelevant - # (i.e. intentionally identical). - "equal": "equal", # todo what's difference between this and above? which to use? - # wider: The target mapping is wider in meaning than the source concept. - "skos:broader": "wider", - "skos:broadMatch": "wider", # canonical - # subsumes: The target mapping subsumes the meaning of the source concept (e.g. the - # source is-a target). - "rdfs:subClassOf": "subsumes", - "owl:subClassOf": "subsumes", - # narrower: The target mapping is narrower in meaning than the source concept. The - # sense in which the mapping is narrower SHALL be described in the comments in this - # case, and applications should be careful when attempting to use these mappings - # operationally. - "skos:narrower": "narrower", - "skos:narrowMatch": "narrower", # canonical - # specializes: The target mapping specializes the meaning of the source concept - # (e.g. the target is-a source). - "sssom:superClassOf": "specializes", - # inexact: The target mapping overlaps with the source concept, but both source and - # target cover additional meaning, or the definitions are imprecise and it is - # uncertain whether they have the same boundaries to their meaning. The sense in - # which the mapping is inexact SHALL be described in the comments in this case, and - # applications should be careful when attempting to use these mappings operationally - "skos:closeMatch": "inexact", - # unmatched: There is no match for this concept in the target code system. - # todo: unmatched: this is more complicated. This will be a combination of - # predicate_id and predicate_modifier (if present). See: - # https://github.com/mapping-commons/sssom/issues/185 - "unmatched": "unmatched", - # disjoint: This is an explicit assertion that there is no mapping between the - # source and target concept. - "owl:disjointWith": "disjoint", - }.get( - row["predicate_id"], row["predicate_id"] - ), # r4 - # "relationship": row['predicate_id'], # r5 - # "comment": '', - "extension": [ - { - # todo: `mapping_justification` consider changing `ValueString` -> `ValueCoding` - # ...that is, if I happen to know the categories/codes for this categorical variable - # ...if i do that, do i also need to upload that coding as a (i) `ValueSet` resource? (or (ii) codeable concept? prolly (i)) - "url": "http://example.org/fhir/StructureDefinition/mapping_justification", - "ValueString": row.get( - "mapping_justification", - row.get("mapping_justification", ""), - ), - } - ], - } - ], - } - for i, row in df.iterrows() - ], + "element": [] # "unmapped": { # todo: conceptmap # "mode": "fixed", # "code": "temp", @@ -426,9 +408,49 @@ def to_fhir_json(msdf: MappingSetDataFrame) -> Dict: } ], } + if "mapping_set_title" in metadata: + json_obj["title"] = metadata["mapping_set_title"] + + # todo: Override? but how? (2024/04/05 Joe: idr what I was trying to override) + if "subject_source" in metadata: + json_obj["sourceUri"] = metadata["subject_source"] + json_obj["group"][0]["source"] = metadata["subject_source"] + if "object_source" in metadata: + json_obj["targetUri"] = metadata["object_source"] + json_obj["group"][0]["target"] = metadata["object_source"] + + for _i, row in df.iterrows(): + entry = { + "code": row["subject_id"], + "display": row.get("subject_label", ""), # todo: if empty, don't add this key + "target": [ + { + "code": row["object_id"], + "display": row.get("object_label", ""), # todo: if empty, don't add this key + "equivalence": equivalence_map.get( + row["predicate_id"], row["predicate_id"] + ), # r4 + # "relationship": row['predicate_id'], # r5 + # "comment": '', + "extension": [ + { + "url": "http://example.org/fhir/StructureDefinition/mapping_justification", + "valueString": row.get( + "mapping_justification", + row.get( + "mapping_justification", "" + ), # todo: if empty, don't add this key + ), + } + ], + } + ], + } + json_obj["group"][0]["element"].append(entry) # Delete empty fields - # todo: This should be recursive? + # todo: This should be recursive? yes + # - it catches empty 'sourceUri' and 'targetUri', but not 'source' and 'target' keys_to_delete: List[str] = [] for k, v in json_obj.items(): if v in [ @@ -503,9 +525,9 @@ def to_ontoportal_json(msdf: MappingSetDataFrame) -> List[Dict]: WRITER_FUNCTIONS: Dict[str, Tuple[Callable, Optional[str]]] = { "tsv": (write_table, None), "owl": (write_owl, SSSOM_DEFAULT_RDF_SERIALISATION), - "ontoportal_json": (write_ontoportal_json, None), - "fhir_json": (write_fhir_json, None), - "json": (write_json, None), + "ontoportal_json": (write_json, "ontoportal_json"), + "fhir_json": (write_json, "fhir_json"), + "json": (write_json, "json"), "rdf": (write_rdf, SSSOM_DEFAULT_RDF_SERIALISATION), } for rdf_format in RDF_FORMATS: diff --git a/tests/test_cli.py b/tests/test_cli.py index f6690af5..29e462e3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -53,6 +53,7 @@ def test_cli_single_input(self): # These test only run on TSV inputs self.run_convert(runner, test) self.run_convert(runner, test, "ontoportal_json") + self.run_convert(runner, test, "fhir_json") self.run_validate(runner, test) self.run_parse(runner, test) diff --git a/tests/test_conversion.py b/tests/test_conversion.py index faeae644..c4feccd1 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -15,6 +15,7 @@ from sssom.sssom_document import MappingSetDocument from sssom.util import MappingSetDataFrame, to_mapping_set_dataframe from sssom.writers import ( + to_fhir_json, to_json, to_ontoportal_json, to_owl_graph, @@ -62,6 +63,8 @@ def test_conversion(self): self._test_to_json(mdoc, test) logging.info("Testing ontoportal JSON export") self._test_to_ontoportal_json(mdoc, test) + logging.info("Testing fhir_json JSON export") + self._test_to_fhir_json(mdoc, test) def _test_to_owl_graph(self, mdoc, test): msdf = to_mapping_set_dataframe(mdoc) @@ -85,6 +88,13 @@ def _test_to_json(self, mdoc, test: SSSOMTestCase): with open(test.get_out_file("json"), "w") as file: write_json(msdf, file, serialisation="json") + def _test_to_fhir_json(self, mdoc, test: SSSOMTestCase): + msdf = to_mapping_set_dataframe(mdoc) + d = to_fhir_json(msdf) + self.assertEqual( + len(d["group"][0]["element"]), test.ct_data_frame_rows, "wrong number of mappings." + ) + def _test_to_ontoportal_json(self, mdoc, test: SSSOMTestCase): msdf = to_mapping_set_dataframe(mdoc) jsonob = to_ontoportal_json(msdf) diff --git a/tests/test_writers.py b/tests/test_writers.py index 82cf3e20..ad27e565 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -3,6 +3,7 @@ import json import os import unittest +from typing import Any, Dict import pandas as pd from curies import Converter @@ -21,9 +22,7 @@ from sssom.writers import ( _update_sssom_context_with_prefixmap, to_json, - write_fhir_json, write_json, - write_ontoportal_json, write_owl, write_rdf, write_table, @@ -130,18 +129,47 @@ def test_update_sssom_context_with_prefixmap(self): def test_write_sssom_fhir(self): """Test writing as FHIR ConceptMap JSON.""" + # Vars path = os.path.join(test_out_dir, "test_write_sssom_fhir.json") + msdf: MappingSetDataFrame = self.msdf + metadata: Dict[str, Any] = msdf.metadata + mapping_set_id: str = metadata["mapping_set_id"] + + # Write with open(path, "w") as file: - write_fhir_json(self.msdf, file) - # todo: @Joe: after implementing reader/importer, change this to `msdf = parse_sssom_fhir_json()` + write_json(self.msdf, file, "fhir_json") + # Read + # todo: after implementing reader/importer, change this to `msdf = parse_sssom_fhir_json()` with open(path, "r") as file: d = json.load(file) - # todo: @Joe: What else is worth checking? + # Test + # - metadata + self.assertEqual(d["resourceType"], "ConceptMap") + self.assertIn(d["identifier"][0]["system"], mapping_set_id) + self.assertEqual(len(d["identifier"]), 1) + self.assertEqual( + len({d["identifier"][0]["value"], mapping_set_id, d["url"]}), 1 + ) # assert all same + # todo: if/when more test cases, shan't be just 'basic.tsv' + self.assertEqual(d["name"], "basic.tsv") + # self.assertEqual(d["title"], "todo") # missing from basic.tsv + self.assertEqual(d["status"], "draft") + self.assertEqual(d["experimental"], True) + self.assertEqual(len(d["date"]), len("YYYY-MM-DD")) + self.assertEqual(d["copyright"], "https://creativecommons.org/publicdomain/zero/1.0/") + # - n mappings self.assertEqual( len(d["group"][0]["element"]), self.mapping_count, f"{path} has the wrong number of mappings.", ) + # - more + self.assertEqual(len(d["group"]), 1) + # todo: code + # todo: display + # todo: equivalence + # - I'm getting: subsumes, owl:equivalentClass (and see what else in basic.tsv) + # todo: mapping_justification extensionprint() # TODO: temp def test_write_sssom_owl(self): """Test writing as OWL.""" @@ -153,7 +181,7 @@ def test_write_sssom_ontoportal_json(self): """Test writing as ontoportal JSON.""" path = os.path.join(test_out_dir, "test_write_sssom_ontoportal_json.json") with open(path, "w") as file: - write_ontoportal_json(self.msdf, file) + write_json(self.msdf, file, "ontoportal_json") with open(path, "r") as file: d = json.load(file)