mapping-commons · hrshdhgd · Mar 16, 2023 · Mar 13, 2023 · Mar 14, 2023 · Mar 14, 2023
diff --git a/sssom/cli.py b/sssom/cli.py
@@ -499,7 +499,7 @@ def correlations(input: str, output: TextIO, transpose: bool, fields: Tuple):
 @click.option(
     "-R",
     "--reconcile",
-    default=True,
+    default=False,
     help="Boolean indicating the need for reconciliation of the SSSOM tsv file.",
 )
 @output_option

diff --git a/sssom/util.py b/sssom/util.py
@@ -277,6 +277,7 @@ def filter_redundant_rows(
     # create a 'sort' method and then replce the following line by sort()
     df = sort_sssom(df)
     # df[CONFIDENCE] = df[CONFIDENCE].apply(lambda x: x + random.random() / 10000)
+    confidence_in_original = CONFIDENCE in df.columns
     df, nan_df = assign_default_confidence(df)
     if ignore_predicate:
         key = [SUBJECT_ID, OBJECT_ID]
@@ -359,7 +360,7 @@ def filter_redundant_rows(
             [get_row_based_on_hierarchy(concerned_df), return_df], axis=0
         ).drop_duplicates()
 
-    if return_df[CONFIDENCE].isnull().all():
+    if not confidence_in_original:
         return_df = return_df.drop(columns=[CONFIDENCE], axis=1)
     return return_df
 
@@ -402,7 +403,7 @@ def assign_default_confidence(
     if df is not None:
         new_df = df.copy()
         if CONFIDENCE not in new_df.columns:
-            new_df[CONFIDENCE] = np.NaN
+            new_df[CONFIDENCE] = 0.0  # np.NaN
             nan_df = pd.DataFrame(columns=new_df.columns)
         else:
             new_df = df[~df[CONFIDENCE].isna()]
@@ -703,6 +704,7 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
     """
 
     # Handle DataFrames with no 'confidence' column (basically adding a np.NaN to all non-numeric confidences)
+    confidence_in_original = CONFIDENCE in df.columns
     df, nan_df = assign_default_confidence(df)
     if df is None:
         raise ValueError(
@@ -823,6 +825,9 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
     else:
         return_df = reconciled_df.append(nan_df).drop_duplicates()
 
+    if not confidence_in_original:
+        return_df = return_df.drop(columns=[CONFIDENCE], axis=1)
+
     return return_df
 
 

diff --git a/tests/data/reconcile_1.tsv b/tests/data/reconcile_1.tsv
@@ -0,0 +1,18 @@
+# curie_map:
+#   UBERON: http://purl.obolibrary.org/obo/UBERON_
+#   ZFS: http://purl.obolibrary.org/obo/ZFS_
+#   oio: http://www.geneontology.org/formats/oboInOwl#
+#   owl: http://www.w3.org/2002/07/owl#
+#   rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
+#   rdfs: http://www.w3.org/2000/01/rdf-schema#
+#   semapv: https://w3id.org/semapv/
+#   skos: http://www.w3.org/2004/02/skos/core#
+#   sssom: https://w3id.org/sssom/
+# license: https://w3id.org/sssom/license/unspecified
+# mapping_set_id: https://w3id.org/sssom/mappings/72debc9d-ca69-45e8-b46d-aef8361bedf2
+# object_source: ZFS
+# subject_source: UBERON
+subject_id	subject_label	predicate_id	object_id	mapping_justification	subject_source	object_source
+UBERON:0000069	larval stage	oio:hasDbXref	ZFS:0000048	semapv:UnspecifiedMatching	UBERON	ZFS
+UBERON:0000105	life cycle stage	oio:hasDbXref	ZFS:0100000	semapv:UnspecifiedMatching	UBERON	ZFS
+UBERON:0000105	life cycle stage	oio:hasDbXref	ZFS:0000000	semapv:UnspecifiedMatching	UBERON	ZFS
diff --git a/tests/data/reconcile_2.tsv b/tests/data/reconcile_2.tsv
@@ -0,0 +1,19 @@
+# curie_map:
+#   UBERON: http://purl.obolibrary.org/obo/UBERON_
+#   WBls: http://purl.obolibrary.org/obo/WBls_
+#   oio: http://www.geneontology.org/formats/oboInOwl#
+#   owl: http://www.w3.org/2002/07/owl#
+#   rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
+#   rdfs: http://www.w3.org/2000/01/rdf-schema#
+#   semapv: https://w3id.org/semapv/
+#   skos: http://www.w3.org/2004/02/skos/core#
+#   sssom: https://w3id.org/sssom/
+# license: https://w3id.org/sssom/license/unspecified
+# mapping_set_id: https://w3id.org/sssom/mappings/c5e357f5-86df-4aaa-a30e-8a23ad523ab2
+# object_source: WBls
+# subject_source: UBERON
+subject_id	subject_label	predicate_id	object_id	mapping_justification	subject_source	object_source
+UBERON:0000066	fully formed stage	oio:hasDbXref	WBls:0000041	semapv:UnspecifiedMatching	UBERON	WBls
+UBERON:0000068	embryo stage	oio:hasDbXref	WBls:0000003	semapv:UnspecifiedMatching	UBERON	WBls
+UBERON:0000068	embryo stage	oio:hasDbXref	WBls:0000102	semapv:UnspecifiedMatching	UBERON	WBls
+UBERON:0000068	embryo stage	oio:hasDbXref	WBls:0000092	semapv:UnspecifiedMatching	UBERON	WBls
diff --git a/tests/test_reconcile.py b/tests/test_reconcile.py
@@ -53,3 +53,14 @@ def test_merge_with_reconcile(self):
         self.assertEqual(53, len(msdf1.df))
         self.assertEqual(53, len(msdf2.df))
         self.assertEqual(len(merged_msdf.df), (len(msdf1.df) + len(msdf2.df)))
+
+    def test_merge_with_reconcile_without_confidence(self):
+        """Test merging two tables without reconciliation."""
+        msdf1 = parse_sssom_table(data_dir / "reconcile_1.tsv")
+        msdf2 = parse_sssom_table(data_dir / "reconcile_2.tsv")
+
+        merged_msdf = merge_msdf(msdf1, msdf2, reconcile=True)
+
+        self.assertEqual(3, len(msdf1.df))
+        self.assertEqual(4, len(msdf2.df))
+        self.assertEqual(len(merged_msdf.df), (len(msdf1.df) + len(msdf2.df)))