Skip to content
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
PYTHON=python
SSSOM_VERSION_TAG=0.11.0
SSSOM_VERSION_TAG=0.12.0
DEFAULT_PREFIX_MAP="https://raw.githubusercontent.com/biopragmatics/bioregistry/main/exports/contexts/obo.context.jsonld"
SSSOM_PY="https://raw.githubusercontent.com/mapping-commons/sssom/$(SSSOM_VERSION_TAG)/src/sssom_schema/datamodel/sssom_schema.py"
SSSOM_YAML="https://raw.githubusercontent.com/mapping-commons/sssom/$(SSSOM_VERSION_TAG)/src/sssom_schema/schema/sssom_schema.yaml"
Expand Down
20 changes: 6 additions & 14 deletions sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@

URI_SSSOM_MAPPINGS = f"{SSSOM_URI_PREFIX}mappings"

#: The 3 columns whose combination would be used as primary keys while merging/grouping
KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID]
#: The 4 columns whose combination would be used as primary keys while merging/grouping
KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID, PREDICATE_MODIFIER]
TRIPLE_IDS = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID]


@dataclass
Expand Down Expand Up @@ -726,7 +727,6 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
raise ValueError(
"The dataframe, after assigning default confidence, appears empty (deal_with_negation)"
)

# If s,!p,o and s,p,o , then prefer higher confidence and remove the other. ###
negation_df: pd.DataFrame
negation_df = df.loc[df[PREDICATE_MODIFIER] == PREDICATE_MODIFIER_NOT]
Expand Down Expand Up @@ -760,9 +760,9 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:

# GroupBy and SELECT ONLY maximum confidence
max_confidence_df: pd.DataFrame
max_confidence_df = combined_normalized_subset.groupby(
KEY_FEATURES, as_index=False
)[CONFIDENCE].max()
max_confidence_df = combined_normalized_subset.groupby(TRIPLE_IDS, as_index=False)[
CONFIDENCE
].max()

# If same confidence prefer "HumanCurated".
reconciled_df_subset = pd.DataFrame(columns=combined_normalized_subset.columns)
Expand Down Expand Up @@ -790,14 +790,6 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
if len(match_condition_1[match_condition_1].index) > 1:
match_condition_1 = match_condition_1[match_condition_1].sample()

# FutureWarning: The frame.append method is deprecated and will be removed
# from pandas in a future version. Use pandas.concat instead.
# reconciled_df_subset = reconciled_df_subset.append(
# combined_normalized_subset.loc[
# match_condition_1[match_condition_1].index, :
# ],
# ignore_index=True,
# )
reconciled_df_subset = pd.concat(
[
reconciled_df_subset,
Expand Down
2 changes: 1 addition & 1 deletion tests/data/bad_basic.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# d: "http://example.org/d/"
# rdfs: "http://www.w3.org/2000/01/rdf-schema#"
# owl: "http://www.w3.org/2002/07/owl#"
# semapv: "https://w3id.org/semapv/"
# semapv: "https://w3id.org/semapv/vocab/"
subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment
c:something YYYYY owl:equivalentClass b:something yyyyyy Lexical c d rdf_matcher 0.81 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data
d:something YYYYY owl:equivalentClass Not a:something yyyyyy Lexical d a rdf_matcher 0.82 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data
Expand Down
2 changes: 1 addition & 1 deletion tests/data/basic.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# c: "http://example.org/c/"
# d: "http://example.org/d/"
# orcid: "https://orcid.org/my-orcid?orcid="
# semapv: "https://w3id.org/semapv/"
# semapv: "https://w3id.org/semapv/vocab/"
subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment
x:appendage appendage owl:equivalentClass y:appendage appendages semapv:ManualMappingCuration x:example y:example rdf_matcher 0.840714406 rdfs:label|skos:prefLabel rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity appendag .
x:appendage appendage owl:equivalentClass z:appendage APPENDAGE semapv:ManualMappingCuration x:example z:example rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity appendag .
Expand Down
2 changes: 1 addition & 1 deletion tests/data/basic2.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# c: "http://example.org/c/"
# d: "http://example.org/d/"
# orcid: "https://orcid.org/my-orcid?orcid="
# semapv: "https://w3id.org/semapv/"
# semapv: "https://w3id.org/semapv/vocab/"
subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment
x:FOO FOO owl:equivalentClass y:FOO FOO semapv:SemanticSimilarityThresholdMatching x:example y:example rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity foo .
x:appendage appendage owl:equivalentClass y:appendage appendages semapv:ManualMappingCuration x:example y:example rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity appendag .
Expand Down
2 changes: 1 addition & 1 deletion tests/data/basic3.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# rdfs: "http://www.w3.org/2000/01/rdf-schema#"
# owl: "http://www.w3.org/2002/07/owl#"
# orcid: "https://orcid.org/my-orcid?orcid="
# semapv: "https://w3id.org/semapv/"
# semapv: "https://w3id.org/semapv/vocab/"
subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment
c:something YYYYY owl:equivalentClass b:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.81 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data
d:something YYYYY owl:equivalentClass Not a:something yyyyyy semapv:LexicalMatching d:example a:example rdf_matcher 0.82 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data
Expand Down
2 changes: 1 addition & 1 deletion tests/data/basic4.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# b2: "http://example.org/b2/"
# c2: "http://example.org/c2/"
# d2: "http://example.org/d2/"
# semapv: "https://w3id.org/semapv/"
# semapv: "https://w3id.org/semapv/vocab/"
subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment
x2:appendage appendage owl:equivalentClass y2:appendage appendages semapv:SemanticSimilarityThresholdMatching x y rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity appendag .
x2:appendage appendage owl:equivalentClass z2:appendage APPENDAGE semapv:SemanticSimilarityThresholdMatching x z rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity appendag .
Expand Down
2 changes: 1 addition & 1 deletion tests/data/basic5.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# b1: "http://example.org/b1/"
# c1: "http://example.org/c1/"
# d1: "http://example.org/d1/"
# semapv: "https://w3id.org/semapv/"
# semapv: "https://w3id.org/semapv/vocab/"
subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment
x1:appendage appendage owl:equivalentClass y1:appendage appendages semapv:LexicalMatching x z rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity organ .
x1:appendage appendage owl:equivalentClass z1:appendage APPENDAGE semapv:LexicalMatching x y rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity region .
Expand Down
2 changes: 1 addition & 1 deletion tests/data/basic7.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# rdfs: "http://www.w3.org/2000/01/rdf-schema#"
# owl: "http://www.w3.org/2002/07/owl#"
# orcid: "https://orcid.org/my-orcid?orcid="
# semapv: "https://w3id.org/semapv/"
# semapv: "https://w3id.org/semapv/vocab/"
# skos: "http://www.w3.org/2004/02/skos/core#"
subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment
a:something YYYYY owl:equivalentClass b:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data
Expand Down
2 changes: 1 addition & 1 deletion tests/data/cob-to-external.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# rdfs: "http://www.w3.org/2000/01/rdf-schema#"
# skos: "http://www.w3.org/2004/02/skos/core#"
# owl: "http://www.w3.org/2002/07/owl#"
# semapv: "https://w3id.org/semapv/"
# semapv: "https://w3id.org/semapv/vocab/"
# BFO: "http://purl.obolibrary.org/obo/BFO_"
# CARO: "http://purl.obolibrary.org/obo/CARO_"
# CHEBI: "http://purl.obolibrary.org/obo/CHEBI_"
Expand Down
2 changes: 1 addition & 1 deletion tests/data/reconcile_1.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# owl: http://www.w3.org/2002/07/owl#
# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
# rdfs: http://www.w3.org/2000/01/rdf-schema#
# semapv: https://w3id.org/semapv/
# semapv: https://w3id.org/semapv/vocab/
# skos: http://www.w3.org/2004/02/skos/core#
# sssom: https://w3id.org/sssom/
# license: https://w3id.org/sssom/license/unspecified
Expand Down
2 changes: 1 addition & 1 deletion tests/data/reconcile_2.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# owl: http://www.w3.org/2002/07/owl#
# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
# rdfs: http://www.w3.org/2000/01/rdf-schema#
# semapv: https://w3id.org/semapv/
# semapv: https://w3id.org/semapv/vocab/
# skos: http://www.w3.org/2004/02/skos/core#
# sssom: https://w3id.org/sssom/
# license: https://w3id.org/sssom/license/unspecified
Expand Down
2 changes: 1 addition & 1 deletion tests/data/test_annotate_sssom.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# owl: http://www.w3.org/2002/07/owl#
# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
# rdfs: http://www.w3.org/2000/01/rdf-schema#
# semapv: https://w3id.org/semapv/
# semapv: https://w3id.org/semapv/vocab/
# skos: http://www.w3.org/2004/02/skos/core#
# sssom: https://w3id.org/sssom/
# x: http://example.org/x/
Expand Down
2 changes: 1 addition & 1 deletion tests/data/test_clean_prefix.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# rdfs: "http://www.w3.org/2000/01/rdf-schema#"
# owl: "http://www.w3.org/2002/07/owl#"
# orcid: "https://orcid.org/my-orcid?orcid="
# semapv: "https://w3id.org/semapv/"
# semapv: "https://w3id.org/semapv/vocab/"
subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment
c:something YYYYY owl:equivalentClass b:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.81 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data
d:something YYYYY owl:equivalentClass Not a:something yyyyyy semapv:LexicalMatching d:example a:example rdf_matcher 0.82 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data
Expand Down
2 changes: 1 addition & 1 deletion tests/data/test_filter_sssom.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# owl: http://www.w3.org/2002/07/owl#
# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
# rdfs: http://www.w3.org/2000/01/rdf-schema#
# semapv: https://w3id.org/semapv/
# semapv: https://w3id.org/semapv/vocab/
# skos: http://www.w3.org/2004/02/skos/core#
# sssom: https://w3id.org/sssom/
# x: http://example.org/x/
Expand Down
2 changes: 1 addition & 1 deletion tests/data/test_inject_metadata_msdf.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# owl: http://www.w3.org/2002/07/owl#
# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
# rdfs: http://www.w3.org/2000/01/rdf-schema#
# semapv: https://w3id.org/semapv/
# semapv: https://w3id.org/semapv/vocab/
# skos: http://www.w3.org/2004/02/skos/core#
# sssom: https://w3id.org/sssom/
# license: https://creativecommons.org/licenses/by-nc/4.0/
Expand Down