diff --git a/sssom/cli.py b/sssom/cli.py index 0f9946df..8155b49d 100644 --- a/sssom/cli.py +++ b/sssom/cli.py @@ -168,7 +168,7 @@ def convert(input: str, output: TextIO, output_format: str): ) @click.option( "-E", - "--embedded-mode", + "--embedded-mode / --non-embedded-mode", default=True, is_flag=True, help="If False, the resultant SSSOM file will be saved\ @@ -484,7 +484,7 @@ def correlations(input: str, output: TextIO, transpose: bool, fields: Tuple): help="Boolean indicating the need for reconciliation of the SSSOM tsv file.", ) @output_option -def merge(inputs: str, output: TextIO, reconcile: bool = True): +def merge(inputs: str, output: TextIO, reconcile: bool = False): """Merge multiple MappingSetDataFrames into one . if reconcile=True, then dedupe(remove redundant lower confidence mappings) and diff --git a/sssom/cliques.py b/sssom/cliques.py index 941eda95..e3907474 100644 --- a/sssom/cliques.py +++ b/sssom/cliques.py @@ -11,6 +11,17 @@ # from .sssom_datamodel import Mapping from sssom_schema import Mapping +from sssom.constants import ( + OWL_DIFFERENT_FROM, + OWL_EQUIVALENT_CLASS, + RDFS_SUBCLASS_OF, + SKOS_BROAD_MATCH, + SKOS_CLOSE_MATCH, + SKOS_EXACT_MATCH, + SKOS_NARROW_MATCH, + SSSOM_SUPERCLASS_OF, +) + from .parsers import to_mapping_set_document from .sssom_document import MappingSetDocument from .util import MappingSetDataFrame @@ -31,24 +42,22 @@ def to_digraph(msdf: MappingSetDataFrame) -> nx.DiGraph: pi = None - if p == "owl:equivalentClass": + if p == OWL_EQUIVALENT_CLASS: pi = 2 - elif p == "skos:exactMatch": + elif p == SKOS_EXACT_MATCH: pi = 2 - elif p == "skos:closeMatch": + elif p == SKOS_CLOSE_MATCH: # TODO: consider distributing pi = 2 - elif p == "owl:subClassOf": + elif p == RDFS_SUBCLASS_OF: pi = 0 - elif p == "skos:broadMatch": + elif p == SKOS_BROAD_MATCH: pi = 0 - elif p == "inverseOf(owl:subClassOf)": + elif p == SSSOM_SUPERCLASS_OF: pi = 1 - elif p == "skos:narrowMatch": + elif p == SKOS_NARROW_MATCH: pi = 1 - elif p == "owl:differentFrom": - pi = 3 - elif p == "dbpedia-owl:different": + elif p == OWL_DIFFERENT_FROM: pi = 3 if pi == 0: diff --git a/sssom/constants.py b/sssom/constants.py index dda77165..0cec65a2 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -118,6 +118,38 @@ SUBJECT_SOURCE_ID = "subject_source_id" OBJECT_SOURCE_ID = "object_source_id" +# PREDICATES +OWL_EQUIVALENT_CLASS = "owl:equivalentClass" +OWL_EQUIVALENT_PROPERTY = "owl:equivalentProperty" +OWL_DIFFERENT_FROM = "owl:differentFrom" +RDFS_SUBCLASS_OF = "rdfs:subClassOf" +RDFS_SUBPROPERTY_OF = "rdfs:subPropertyOf" +OWL_SAME_AS = "owl:sameAs" +SKOS_EXACT_MATCH = "skos:exactMatch" +SKOS_CLOSE_MATCH = "skos:closeMatch" +SKOS_BROAD_MATCH = "skos:broadMatch" +SKOS_NARROW_MATCH = "skos:narrowMatch" +OBO_HAS_DB_XREF = "oboInOwl:hasDbXref" +SKOS_RELATED_MATCH = "skos:relatedMatch" +RDF_SEE_ALSO = "rdfs:seeAlso" +SSSOM_SUPERCLASS_OF = "inverseOf(owl:subClassOf)" + +PREDICATE_LIST = [ + OWL_EQUIVALENT_CLASS, + OWL_EQUIVALENT_PROPERTY, + RDFS_SUBCLASS_OF, + SSSOM_SUPERCLASS_OF, + RDFS_SUBPROPERTY_OF, + OWL_SAME_AS, + SKOS_EXACT_MATCH, + SKOS_CLOSE_MATCH, + SKOS_BROAD_MATCH, + SKOS_NARROW_MATCH, + OBO_HAS_DB_XREF, + SKOS_RELATED_MATCH, + RDF_SEE_ALSO, +] + class SEMAPV(Enum): """SEMAPV Enum containing different mapping_justification.""" diff --git a/sssom/util.py b/sssom/util.py index f56ddfd1..dc024455 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -50,13 +50,24 @@ OBJECT_ID, OBJECT_LABEL, OBJECT_SOURCE, + OBO_HAS_DB_XREF, + OWL_DIFFERENT_FROM, + OWL_EQUIVALENT_CLASS, PREDICATE_ID, + PREDICATE_LIST, PREDICATE_MODIFIER, PREDICATE_MODIFIER_NOT, PREFIX_MAP_MODES, + RDFS_SUBCLASS_OF, SCHEMA_DICT, SCHEMA_YAML, SEMAPV, + SKOS_BROAD_MATCH, + SKOS_CLOSE_MATCH, + SKOS_EXACT_MATCH, + SKOS_NARROW_MATCH, + SKOS_RELATED_MATCH, + SSSOM_SUPERCLASS_OF, SUBJECT_CATEGORY, SUBJECT_ID, SUBJECT_LABEL, @@ -289,13 +300,83 @@ def filter_redundant_rows( # will be removed from pandas in a future version. # Use pandas.concat instead. # return_df = df.append(nan_df).drop_duplicates() - return_df = pd.concat([df, nan_df]).drop_duplicates() + confidence_reconciled_df = pd.concat([df, nan_df]).drop_duplicates() + + # Reconciling dataframe rows based on the predicates with equal confidence. + if PREDICATE_MODIFIER in confidence_reconciled_df.columns: + tmp_df = confidence_reconciled_df[ + [SUBJECT_ID, OBJECT_ID, PREDICATE_ID, CONFIDENCE, PREDICATE_MODIFIER] + ] + tmp_df = tmp_df[tmp_df[PREDICATE_MODIFIER] != PREDICATE_MODIFIER_NOT].drop( + PREDICATE_MODIFIER, axis=1 + ) + else: + tmp_df = confidence_reconciled_df[ + [SUBJECT_ID, OBJECT_ID, PREDICATE_ID, CONFIDENCE] + ] + tmp_df_grp = tmp_df.groupby( + [SUBJECT_ID, OBJECT_ID, CONFIDENCE], as_index=False + ).count() + tmp_df_grp = tmp_df_grp[tmp_df_grp[PREDICATE_ID] > 1].drop(PREDICATE_ID, axis=1) + non_predicate_reconciled_df = ( + confidence_reconciled_df.merge( + tmp_df_grp, on=list(tmp_df_grp.columns), how="left", indicator=True + ) + .query('_merge == "left_only"') + .drop(columns="_merge") + ) + + multiple_predicate_df = ( + confidence_reconciled_df.merge( + tmp_df_grp, on=list(tmp_df_grp.columns), how="right", indicator=True + ) + .query('_merge == "both"') + .drop(columns="_merge") + ) + + return_df = non_predicate_reconciled_df + for _, row in tmp_df_grp.iterrows(): + logic_df = multiple_predicate_df[list(tmp_df_grp.columns)] == row + concerned_row_index = ( + logic_df[logic_df[list(tmp_df_grp.columns)]].dropna().index + ) + concerned_df = multiple_predicate_df.iloc[concerned_row_index] + # Go down the hierarchical list of PREDICATE_LIST and grab the first match + return_df = pd.concat( + [get_row_based_on_hierarchy(concerned_df), return_df], axis=0 + ).drop_duplicates() if return_df[CONFIDENCE].isnull().all(): return_df = return_df.drop(columns=[CONFIDENCE], axis=1) return return_df +def get_row_based_on_hierarchy(df: pd.DataFrame): + """Get row based on hierarchy of predicates. + + The hierarchy is as follows: + # owl:equivalentClass + # owl:equivalentProperty + # rdfs:subClassOf + # rdfs:subPropertyOf + # owl:sameAs + # skos:exactMatch + # skos:closeMatch + # skos:broadMatch + # skos:narrowMatch + # oboInOwl:hasDbXref + # skos:relatedMatch + # rdfs:seeAlso + + :param df: Dataframe containing multiple predicates for same subject and object. + :return: Dataframe with a single row which ranks higher in the hierarchy. + """ + for pred in PREDICATE_LIST: + hierarchical_df = df[df[PREDICATE_ID] == pred] + if not hierarchical_df.empty: + return hierarchical_df + + def assign_default_confidence( df: pd.DataFrame, ) -> Tuple[pd.DataFrame, pd.DataFrame]: @@ -430,29 +511,27 @@ def dataframe_to_ptable(df: pd.DataFrame, *, inverse_factor: float = None): residual_confidence = (1 - (confidence + inverse_confidence)) / 2.0 predicate = row[PREDICATE_ID] - if predicate == "owl:equivalentClass": + if predicate == OWL_EQUIVALENT_CLASS: predicate_type = PREDICATE_EQUIVALENT - elif predicate == "skos:exactMatch": + elif predicate == SKOS_EXACT_MATCH: predicate_type = PREDICATE_EQUIVALENT - elif predicate == "skos:closeMatch": + elif predicate == SKOS_CLOSE_MATCH: # TODO: consider distributing predicate_type = PREDICATE_EQUIVALENT - elif predicate == "owl:subClassOf": + elif predicate == RDFS_SUBCLASS_OF: predicate_type = PREDICATE_SUBCLASS - elif predicate == "skos:broadMatch": + elif predicate == SKOS_BROAD_MATCH: predicate_type = PREDICATE_SUBCLASS - elif predicate == "inverseOf(owl:subClassOf)": + elif predicate == SSSOM_SUPERCLASS_OF: predicate_type = PREDICATE_SUPERCLASS - elif predicate == "skos:narrowMatch": + elif predicate == SKOS_NARROW_MATCH: predicate_type = PREDICATE_SUPERCLASS - elif predicate == "owl:differentFrom": - predicate_type = PREDICATE_SIBLING - elif predicate == "dbpedia-owl:different": + elif predicate == OWL_DIFFERENT_FROM: predicate_type = PREDICATE_SIBLING # * Added by H2 ############################ - elif predicate == "oboInOwl:hasDbXref": + elif predicate == OBO_HAS_DB_XREF: predicate_type = PREDICATE_HAS_DBXREF - elif predicate == "skos:relatedMatch": + elif predicate == SKOS_RELATED_MATCH: predicate_type = PREDICATE_RELATED_MATCH # * ######################################## else: @@ -538,7 +617,7 @@ def sha256sum(path: str) -> str: def merge_msdf( *msdfs: MappingSetDataFrame, - reconcile: bool = True, + reconcile: bool = False, ) -> MappingSetDataFrame: """Merge multiple MappingSetDataFrames into one. @@ -573,7 +652,10 @@ def merge_msdf( merged_msdf.df = df_merged if reconcile: merged_msdf.df = filter_redundant_rows(merged_msdf.df) - if PREDICATE_MODIFIER in merged_msdf.df.columns: + if ( + PREDICATE_MODIFIER in merged_msdf.df.columns + and PREDICATE_MODIFIER_NOT in merged_msdf.df[PREDICATE_MODIFIER] + ): merged_msdf.df = deal_with_negation(merged_msdf.df) # deals with negation # TODO: Add default values for license and mapping_set_id. diff --git a/tests/data/basic.tsv b/tests/data/basic.tsv index cca3876e..e5ba171d 100644 --- a/tests/data/basic.tsv +++ b/tests/data/basic.tsv @@ -154,7 +154,7 @@ z:region REGION owl:equivalentClass y:region regions semapv:ManualMappingCurati z:tissue TISSUE owl:equivalentClass x:tissue tissue semapv:ManualMappingCuration z:example x:example rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity tissu . z:tissue TISSUE owl:equivalentClass x:tissue tissue semapv:LexicalMatching z:example x:example rdf_matcher 0.881856236 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity tissue . z:tissue TISSUE owl:equivalentClass y:tissue tissues semapv:ManualMappingCuration z:example y:example rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity tissu . -a:something XXXXX owl:subClassOf b:something xxxxxx semapv:LexicalMatching a:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +a:something XXXXX rdfs:subClassOf b:something xxxxxx semapv:LexicalMatching a:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data c:something YYYYY owl:equivalentClass d:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.81 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data d:something YYYYY owl:equivalentClass Not a:something yyyyyy semapv:LexicalMatching d:example a:example rdf_matcher 0.82 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data a:something XYXYX owl:equivalentClass c:something xyxyxy semapv:LexicalMatching a:example c:example rdf_matcher 0.83 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data diff --git a/tests/data/basic7.tsv b/tests/data/basic7.tsv new file mode 100644 index 00000000..77f92dbb --- /dev/null +++ b/tests/data/basic7.tsv @@ -0,0 +1,50 @@ +#license: "https://creativecommons.org/publicdomain/zero/1.0/" +#mapping_set_id: https://w3id.org/sssom/mapping/tests/data/basic3.tsv +#mapping_tool: "https://github.com/cmungall/rdf_matcher" +#creator_id: +# - "orcid:1234" +#mapping_date: "2020-05-30" +#curie_map: +# a: "http://example.org/a/" +# b: "http://example.org/b/" +# c: "http://example.org/c/" +# d: "http://example.org/d/" +# rdfs: "http://www.w3.org/2000/01/rdf-schema#" +# owl: "http://www.w3.org/2002/07/owl#" +# orcid: "https://orcid.org/my-orcid?orcid=" +# semapv: "https://w3id.org/semapv/" +# skos: "http://www.w3.org/2004/02/skos/core#" +subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment +a:something YYYYY owl:equivalentClass b:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +a:something YYYYY owl:equivalentProperty b:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +b:something XXXXX owl:equivalentProperty c:something xxxxxx semapv:LexicalMatching b:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +b:something XXXXX rdfs:subClassOf c:something xxxxxx semapv:LexicalMatching b:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +c:something YXYXY rdfs:subClassOf d:something yxyxyx semapv:ManualMappingCuration c:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +c:something YXYXY rdfs:subPropertyOf d:something yxyxyx semapv:ManualMappingCuration c:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +d:something YYYYY rdfs:subPropertyOf c:something yyyyyy semapv:LexicalMatching d:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +d:something YYYYY owl:sameAs c:something yyyyyy semapv:LexicalMatching d:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +c:something YXYXY owl:sameAs b:something yxyxyx semapv:ManualMappingCuration c:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +c:something YXYXY skos:exactMatch b:something yxyxyx semapv:ManualMappingCuration c:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +b:something XXXXX skos:exactMatch a:something xxxxxx semapv:LexicalMatching b:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +b:something XXXXX skos:closeMatch a:something xxxxxx semapv:LexicalMatching b:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +a:something YYYYY skos:closeMatch c:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +a:something YYYYY skos:broadMatch c:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +b:something XXXXX skos:broadMatch d:something xxxxxx semapv:LexicalMatching b:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +b:something XXXXX skos:narrowMatch d:something xxxxxx semapv:LexicalMatching b:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +c:something YXYXY skos:narrowMatch a:something yxyxyx semapv:ManualMappingCuration c:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +c:something YXYXY oboInOwl:hasDbXref a:something yxyxyx semapv:ManualMappingCuration c:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +d:something YYYYY oboInOwl:hasDbXref b:something yyyyyy semapv:LexicalMatching d:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +d:something YYYYY skos:relatedMatch b:something yyyyyy semapv:LexicalMatching d:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +a:something YYYYY skos:relatedMatch d:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +a:something YYYYY rdfs:seeAlso d:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +d:something YYYYY rdfs:seeAlso a:something yyyyyy semapv:LexicalMatching d:example a:example rdf_matcher 0.82 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +d:something YYYYY owl:equivalentClass Not a:something yyyyyy semapv:LexicalMatching d:example a:example rdf_matcher 0.82 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +a:something XYXYX owl:equivalentClass c:something xyxyxy semapv:LexicalMatching a:example c:example rdf_matcher 0.83 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +c:something YXYXY owl:equivalentClass b:something yxyxyx semapv:ManualMappingCuration c:example b:example rdf_matcher 0.845 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +b:something XXXXX owl:equivalentClass a:something xxxxxx semapv:LexicalMatching b:example a:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +c:something YYYYY owl:equivalentClass d:something yyyyyy semapv:LexicalMatching c:example d:example rdf_matcher 0.81 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +c:something YXYXY owl:equivalentClass Not b:something yxyxyx semapv:ManualMappingCuration c:example b:example rdf_matcher 0.845 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +d:something XYXYX owl:equivalentClass b:something xyxyxy semapv:LexicalMatching d:example b:example rdf_matcher 0.83 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +a:something XXXXX owl:subClassOf b:something xxxxxx semapv:LexicalMatching a:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +d:something YYYYY owl:equivalentClass a:something yyyyyy semapv:ManualMappingCuration d:example a:example rdf_matcher 0.82 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data +a:something XXXXX owl:subClassOf Not b:something xxxxxx semapv:LexicalMatching a:example b:example rdf_matcher 0.8 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity xxxxx mock data diff --git a/tests/test_merge.py b/tests/test_merge.py index e7a3f42b..f1dd1c5c 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -21,8 +21,8 @@ def setUp(self) -> None: def test_merge_multiple_inputs(self): """Test merging of multiple msdfs.""" merged_msdf = merge_msdf(*self.msdfs) - self.assertEqual(123, len(merged_msdf.df)) + self.assertEqual(275, len(merged_msdf.df)) def test_merge_single_input(self): """Test merging when a single msdf is provided.""" - self.assertEqual(93, len(merge_msdf(self.msdf).df)) + self.assertEqual(141, len(merge_msdf(self.msdf).df)) diff --git a/tests/test_reconcile.py b/tests/test_reconcile.py index 65fa2275..a9067121 100644 --- a/tests/test_reconcile.py +++ b/tests/test_reconcile.py @@ -13,32 +13,42 @@ class TestReconcile(unittest.TestCase): def setUp(self) -> None: """Test up the test case with the third basic example.""" - self.msdf = parse_sssom_table(data_dir / "basic3.tsv") + self.msdf1 = parse_sssom_table(data_dir / "basic3.tsv") + self.msdf2 = parse_sssom_table(data_dir / "basic7.tsv") def test_filter(self): """Test filtering returns the right number of rows.""" - df = filter_redundant_rows(self.msdf.df) - self.assertEqual(10, len(df.index)) + df1 = filter_redundant_rows(self.msdf1.df) + self.assertEqual(10, len(df1.index)) + df2 = filter_redundant_rows(self.msdf2.df) + self.assertEqual(18, len(df2.index)) def test_deal_with_negation(self): """Test handling negating returns the right number of rows.""" - df = deal_with_negation(self.msdf.df) - self.assertEqual(7, len(df.index)) + df1 = deal_with_negation(self.msdf1.df) + self.assertEqual(7, len(df1.index)) + df2 = deal_with_negation(self.msdf2.df) + self.assertEqual(5, len(df2.index)) def test_merge(self): """Test merging two tables.""" - msdf1 = parse_sssom_table(data_dir / "basic.tsv") - msdf2 = parse_sssom_table(data_dir / "basic2.tsv") - merged_msdf = merge_msdf(msdf1, msdf2) + msdf3 = parse_sssom_table(data_dir / "basic.tsv") + merged_msdf1 = merge_msdf(self.msdf1, msdf3) - self.assertEqual(122, len(merged_msdf.df)) + self.assertEqual(152, len(merged_msdf1.df)) - def test_merge_no_reconcile(self): + merged_msdf2 = merge_msdf(self.msdf2, msdf3) + self.assertEqual(174, len(merged_msdf2.df)) + + merged_msdf3 = merge_msdf(self.msdf1, self.msdf2) + self.assertEqual(34, len(merged_msdf3.df)) + + def test_merge_with_reconcile(self): """Test merging two tables without reconciliation.""" msdf1 = parse_sssom_table(data_dir / "basic4.tsv") msdf2 = parse_sssom_table(data_dir / "basic5.tsv") - merged_msdf = merge_msdf(msdf1, msdf2, reconcile=False) + merged_msdf = merge_msdf(msdf1, msdf2, reconcile=True) self.assertEqual(53, len(msdf1.df)) self.assertEqual(53, len(msdf2.df))