End python-2.7 Support (cidgoh#33)

* pass partialMatchedResourceListSet as list to retainedPhrase() * Updated tests for retainedPhrase() * Updated small_simple expected test output * Clean up test output files * Sort outputs before printing, GComponent -> Component, add newline at end * Show more detail when test output doesn't match expected output * Print ontofetch/ontohelper to stderr. Update test output * Use print function in py2.7 * Remove unused pretty printer * Stop testing py27 start testing py37 * Drop py37 testing
lexmapr · Oct 2, 2018 · ee8582a · ee8582a
1 parent 3f39921
commit ee8582a
Show file tree

Hide file tree

Showing 25 changed files with 172 additions and 178 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,6 +1,5 @@
 language: python
 python:
-  - "2.7"
   - "3.6"
 os:
   - linux
@@ -12,9 +11,6 @@ git:
 # TODO: remove these two settings after refactoring pipeline
 env:
   - PYTHONHASHSEED=0
-matrix:
-  allow_failures:
-    - python: "2.7"
 
 install:
   - sudo apt-get update -qq

diff --git a/lexmapr/ontofetch.py b/lexmapr/ontofetch.py
@@ -29,6 +29,7 @@
 	**************************************************************************
 """ 
 
+from __future__ import print_function
 import json
 import sys
 import os
@@ -183,7 +184,7 @@ def __main__(self):
 		(main_ontology_file, output_file_basename) = self.onto_helper.check_ont_file(args[0], options)
 
 		# Load main ontology file into RDF graph
-		print("Fetching and parsing " + main_ontology_file + " ...")
+		print("Fetching and parsing " + main_ontology_file + " ...", file=sys.stderr)
 
 		try:
 			# ISSUE: ontology file taken in as ascii; rdflib doesn't accept
@@ -200,17 +201,17 @@ def __main__(self):
 
 		# Load self.struct with ontology metadata
 		self.onto_helper.set_ontology_metadata(self.onto_helper.queries['ontology_metadata'])
-		print("Metadata:", json.dumps(self.onto_helper.struct['metadata'],  sort_keys=False, indent=4, separators=(',', ': ')))
+		print("Metadata:", json.dumps(self.onto_helper.struct['metadata'],  sort_keys=False, indent=4, separators=(',', ': ')), file=sys.stderr)
 
 		# Retrieve all subclasses of 'owl:Thing' in given ontology
 		# and place in self.onto_helper.struct.specifications
 		# To retrieve just a given term like BFO:entity
 		# specBinding = {'root': rdflib.URIRef(self.get_expanded_id('BFO:0000001'))}  
-		print('Doing term hierarchy query')
+		print('Doing term hierarchy query', file=sys.stderr)
 		specBinding = {'root': rdflib.URIRef(self.onto_helper.get_expanded_id('owl:Thing'))} 
 		entities = self.onto_helper.do_query_table(self.queries['tree'], specBinding )
 
-		print('Doing terms', len(entities))
+		print('Doing terms', len(entities), file=sys.stderr)
 		self.do_entities(entities)
 
 		self.onto_helper.do_output_json(self.onto_helper.struct, output_file_basename)

diff --git a/lexmapr/ontohelper.py b/lexmapr/ontohelper.py
@@ -5,6 +5,7 @@
  
 """
 
+from __future__ import print_function
 import os
 import json
 import sys
@@ -276,7 +277,7 @@ def do_ontology_includes(self, main_ontology_file):
 			ORDER BY (?import_file)
 		""")		
 
-		print("It has %s import files ..." % len(imports))
+		print("It has %s import files ..." % len(imports), file=sys.stderr)
 
 		for result_row in imports:
 

diff --git a/lexmapr/pipeline.py b/lexmapr/pipeline.py
@@ -166,12 +166,8 @@ def retainedPhrase(termList):
     wordList = []
     retainedSet = []
     returnedSet = []
-    termList = termList.replace("{", "")
-    termList = termList.replace("}", "")
-    #termList = termList.replace("'", "")
-    lst = termList.split("',")
-    # print("ddddddddddddddddeeeee   " + str(lst))
-    for x in lst:
+    for x in termList:
+        x.replace("'", "")
         lst2 = x.split(":")
         a = lst2[0]
         a = a.replace("=", ",")
@@ -966,11 +962,11 @@ def find_full_term_match(sample, lookup_table, cleaned_sample, status_addendum):
     ret.update({
         "matched_term": matched_term,
         "all_match_terms_with_resource_ids":
-            str(list(retained_tokens)),
+            str(sorted(list(retained_tokens))),
         "retained_terms_with_resource_ids":
-            str(list(retained_tokens)),
+            str(sorted(list(retained_tokens))),
         "match_status_macro_level": "Full Term Match",
-        "match_status_micro_level": str(list(final_status)),
+        "match_status_micro_level": str(sorted(list(final_status))),
     })
     # Return
     return ret
@@ -1355,7 +1351,7 @@ def run(args):
 
             # Write to all headers
             if args.format == "full":
-                fw.write("\t" + full_term_match["matched_term"] + "\t"
+                fw.write("\t" + str([full_term_match["matched_term"]]) + "\t"
                     + full_term_match["all_match_terms_with_resource_ids"]
                     + "\t"
                     + full_term_match["retained_terms_with_resource_ids"]
@@ -1391,7 +1387,7 @@ def run(args):
                                                status_addendum)
 
             partial_matches = set(component_and_token_matches["component_matches"])  # Makes a set of all matched components from the above processing
-            status = "GComponent Match"             #Note: GComponent instead of is used as tag to help sorting later in result file
+            status = "Component Match"
 
             # Iterate over token_matches in component_and_token_matches
             for token in component_and_token_matches["token_matches"]:
@@ -1468,7 +1464,7 @@ def run(args):
 
             # If size of set is more than one member, looks for the retained matched terms by defined criteria
             if (len(partialMatchedResourceListSet) > 0):
-                retainedSet = retainedPhrase(str(partialMatchedResourceListSet))
+                retainedSet = retainedPhrase(list(partialMatchedResourceListSet))
                 logger.debug("retainedSet " + str(retainedSet))
                 # HERE SHOULD HAVE ANOTHER RETAING SET
 
@@ -1477,7 +1473,7 @@ def run(args):
             # In case it is for componet matching and we have at least one component matched
             if (len(partial_matches) > 0):
                 if args.format == 'full':
-                    fw.write('\t' + str(list(partial_matches)) + '\t' + str(list(partialMatchedResourceListSet)) + '\t' + str(list(retainedSet)) + '\t' + str(len(retainedSet)) + '\t' + status + '\t' + str(list(final_status)) + '\t' + str(list(remSetDiff)))
+                    fw.write('\t' + str(sorted(list(partial_matches))) + '\t' + str(sorted(list(partialMatchedResourceListSet))) + '\t' + str(sorted(list(retainedSet))) + '\t' + str(len(retainedSet)) + '\t' + status + '\t' + str(sorted(list(final_status))) + '\t' + str(sorted(list(remSetDiff))))
 
                 compctr = 0
                 if args.format == 'full':
@@ -1488,7 +1484,7 @@ def run(args):
                         fw.write("\t" + str(memb))
 
                 if args.format == 'full':
-                    for comp in retainedSet:
+                    for comp in sorted(list(retainedSet)):
                         compctr += 1
                         if (compctr == 1):
                             fw.write("Component" + str(compctr) + "-> " + str(comp))
@@ -1497,9 +1493,9 @@ def run(args):
                     trigger = True
                 else:        # In case of no matching case
                     if args.format == 'full':
-                        fw.write('\t' + str(list(partial_matches)) + '\t' + str(list(partial_matches_with_ids)) + '\t\t' + "\t" + "Sorry No Match" + "\t" + str(list(remaining_tokens)))
-
+                        fw.write('\t' + str(sorted(list(partial_matches))) + '\t' + str(sorted(list(partial_matches_with_ids))) + '\t\t' + "\t" + "Sorry No Match" + "\t" + str(sorted(list(remaining_tokens))))
 
+    fw.write('\n')
     #Output files closed
     if fw is not sys.stdout:
         fw.close()
diff --git a/lexmapr/tests/output/empty.tsv b/lexmapr/tests/output/empty.tsv
@@ -1 +1 @@
-Sample_Id	Sample_Desc	Cleaned_Sample	Phrase_POS_Tagged	Probable_Candidate_Terms	Matched_Term	All_matched_Terms_with_Resource_IDs	Retained_Terms_with_Resource_IDs	Number of Components(In case of Component Match)	Match_Status(Macro Level)	Match_Status(Micro Level)	Remaining_Tokens	Different Components(In case of Component Match)
+Sample_Id	Sample_Desc	Cleaned_Sample	Phrase_POS_Tagged	Probable_Candidate_Terms	Matched_Term	All_matched_Terms_with_Resource_IDs	Retained_Terms_with_Resource_IDs	Number of Components(In case of Component Match)	Match_Status(Macro Level)	Match_Status(Micro Level)	Remaining_Tokens	Different Components(In case of Component Match)
diff --git a/lexmapr/tests/output/empty_not_full.tsv b/lexmapr/tests/output/empty_not_full.tsv
@@ -1 +1 @@
-Sample_Id	Sample_Desc	Cleaned_Sample
+Sample_Id	Sample_Desc	Cleaned_Sample
diff --git a/lexmapr/tests/output/small_simple.tsv b/lexmapr/tests/output/small_simple.tsv
@@ -1,6 +1,6 @@
 Sample_Id	Sample_Desc	Cleaned_Sample	Phrase_POS_Tagged	Probable_Candidate_Terms	Matched_Term	All_matched_Terms_with_Resource_IDs	Retained_Terms_with_Resource_IDs	Number of Components(In case of Component Match)	Match_Status(Macro Level)	Match_Status(Micro Level)	Remaining_Tokens	Different Components(In case of Component Match)
-small_simple1	Chicken Breast	chicken breast	[('chicken', 'NN'), ('breast', 'NN')]	chicken breast	chicken breast	['chicken breast:CandidateTerm_FoodOn_246']	['chicken breast:CandidateTerm_FoodOn_246']		Full Term Match	['Change of Case in Input Data']
-small_simple2	Baked Potato	baked potato	[('baked', 'VBN'), ('potato', 'NN')]	potato	baked potato	['potato (baked):FOODON_03302196']	['potato (baked):FOODON_03302196']		Full Term Match	['Permutation of Tokens in Bracketed Resource Term']
-small_simple3	Canned Corn	canned corn	[('canned', 'VBN'), ('corn', 'NN')]	corn	canned corn	['corn (canned):FOODON_03302665']	['corn (canned):FOODON_03302665']		Full Term Match	['Permutation of Tokens in Bracketed Resource Term']
-small_simple4	Frozen Yogurt	frozen yogurt	[('frozen', 'JJ'), ('yogurt', 'NN')]	yogurt	frozen yogurt	['frozen yogurt:FOODON_03307445']	['frozen yogurt:FOODON_03307445']		Full Term Match	['Change of Case in Input Data']
-small_simple5	Apple Pie	apple pie	[('apple', 'NN'), ('pie', 'NN')]	apple pie	['pie', 'apple food product']	['pie:CandidateTerm_FoodOn_839', 'apple food product:FOODON_00001611']	["apple food product:FOODON_00001611'", 'pie:CandidateTerm_FoodOn_839']	2	GComponent Match	['Suffix Addition- food product to the Input']	[]	Component1-> apple food product:FOODON_00001611', Component2-> pie:CandidateTerm_FoodOn_839
+small_simple1	Chicken Breast	chicken breast	[('chicken', 'NN'), ('breast', 'NN')]	chicken breast	['chicken breast']	['chicken breast:CandidateTerm_FoodOn_246']	['chicken breast:CandidateTerm_FoodOn_246']		Full Term Match	['Change of Case in Input Data']
+small_simple2	Baked Potato	baked potato	[('baked', 'VBN'), ('potato', 'NN')]	potato	['baked potato']	['potato (baked):FOODON_03302196']	['potato (baked):FOODON_03302196']		Full Term Match	['Permutation of Tokens in Bracketed Resource Term']
+small_simple3	Canned Corn	canned corn	[('canned', 'VBN'), ('corn', 'NN')]	corn	['canned corn']	['corn (canned):FOODON_03302665']	['corn (canned):FOODON_03302665']		Full Term Match	['Permutation of Tokens in Bracketed Resource Term']
+small_simple4	Frozen Yogurt	frozen yogurt	[('frozen', 'JJ'), ('yogurt', 'NN')]	yogurt	['frozen yogurt']	['frozen yogurt:FOODON_03307445']	['frozen yogurt:FOODON_03307445']		Full Term Match	['Change of Case in Input Data']
+small_simple5	Apple Pie	apple pie	[('apple', 'NN'), ('pie', 'NN')]	apple pie	['apple food product', 'pie']	['apple food product:FOODON_00001611', 'pie:CandidateTerm_FoodOn_839']	['apple food product:FOODON_00001611', 'pie:CandidateTerm_FoodOn_839']	2	Component Match	['Suffix Addition- food product to the Input']	[]	Component1-> apple food product:FOODON_00001611, Component2-> pie:CandidateTerm_FoodOn_839
diff --git a/lexmapr/tests/output/small_simple_not_full.tsv b/lexmapr/tests/output/small_simple_not_full.tsv
@@ -3,4 +3,4 @@ small_simple1	Chicken Breast	chicken breast	chicken breast	['chicken breast:Cand
 small_simple2	Baked Potato	baked potato	baked potato	['potato (baked):FOODON_03302196']
 small_simple3	Canned Corn	canned corn	canned corn	['corn (canned):FOODON_03302665']
 small_simple4	Frozen Yogurt	frozen yogurt	frozen yogurt	['frozen yogurt:FOODON_03307445']
-small_simple5	Apple Pie	apple pie	apple food product:FOODON_00001611'	pie:CandidateTerm_FoodOn_839
+small_simple5	Apple Pie	apple pie	pie:CandidateTerm_FoodOn_839	apple food product:FOODON_00001611
diff --git a/lexmapr/tests/output/test_abbreviations.tsv b/lexmapr/tests/output/test_abbreviations.tsv
@@ -1,10 +1,10 @@
 Sample_Id	Sample_Desc	Cleaned_Sample	Phrase_POS_Tagged	Probable_Candidate_Terms	Matched_Term	All_matched_Terms_with_Resource_IDs	Retained_Terms_with_Resource_IDs	Number of Components(In case of Component Match)	Match_Status(Macro Level)	Match_Status(Micro Level)	Remaining_Tokens	Different Components(In case of Component Match)
-small_simple1	Breast	breast	[('breast', 'NN')]	breast	breast	['breast:CandidateTerm_FoodOn_188']	['breast:CandidateTerm_FoodOn_188']		Full Term Match	['Change of Case in Input Data']
-small_simple2	Baked Potato	baked potato	[('baked', 'VBN'), ('potato', 'NN')]	potato	baked potato	['potato (baked):FOODON_03302196']	['potato (baked):FOODON_03302196']		Full Term Match	['Permutation of Tokens in Bracketed Resource Term']
-small_simple3	fld	fluid	[('fluid', 'NN')]	fluid	fluid	['fluid:CandidateTerm_OtherOntology_76']	['fluid:CandidateTerm_OtherOntology_76']		Full Term Match	['Abbreviation-Acronym Treatment', 'A Direct Match with Cleaned Sample']
-small_simple4	froz fld	frozen fluid	[('frozen', 'JJ'), ('fluid', 'NN')]	fluid	['fluid', 'preservation by freezing']	['preservation by freezing:FOODON_03470136', 'fluid:CandidateTerm_OtherOntology_76']	["fluid:CandidateTerm_OtherOntology_76'", 'preservation by freezing:FOODON_03470136']	2	GComponent Match	['Synonym Usage', 'Abbreviation-Acronym Treatment']	['froz']	Component1-> fluid:CandidateTerm_OtherOntology_76', Component2-> preservation by freezing:FOODON_03470136
-small_simple5	fld apple	fluid apple	[('fluid', 'NN'), ('apple', 'NN')]	fluid apple	['fluid', 'apple food product']	['apple food product:FOODON_00001611', 'fluid:CandidateTerm_OtherOntology_76']	["fluid:CandidateTerm_OtherOntology_76'", 'apple food product:FOODON_00001611']	2	GComponent Match	['Abbreviation-Acronym Treatment', 'Suffix Addition- food product to the Input']	[]	Component1-> fluid:CandidateTerm_OtherOntology_76', Component2-> apple food product:FOODON_00001611
-small_simple6	csf	cerebrospinal fluid	[('cerebrospinal', 'JJ'), ('fluid', 'NN')]	fluid	cerebrospinal fluid	['cerebrospinal fluid:ENVO_02000029']	['cerebrospinal fluid:ENVO_02000029']		Full Term Match	['Change Case and Abbreviation-Acronym Treatment', 'A Direct Match with Cleaned Sample']
-small_simple7	csf f	cerebrospinal fluid fahrenheit	[('cerebrospinal', 'JJ'), ('fluid', 'NN'), ('fahrenheit', 'NN')]	fluid fahrenheit	['cerebrospinal fluid', 'fluid', 'cerebrospinal']	['cerebrospinal:Quality-BodyRelated', 'cerebrospinal fluid:ENVO_02000029', 'fluid:CandidateTerm_OtherOntology_76']	['cerebrospinal fluid:ENVO_02000029']	1	GComponent Match	['Using Semantic Tagging Resources', 'Change Case and Abbreviation-Acronym Treatment']	['f']	Component1-> cerebrospinal fluid:ENVO_02000029
-small_simple8	Pie csf	pie cerebrospinal fluid	[('pie', 'JJ'), ('cerebrospinal', 'JJ'), ('fluid', 'NN')]	fluid	['cerebrospinal fluid', 'fluid', 'pie', 'cerebrospinal']	['cerebrospinal:Quality-BodyRelated', 'cerebrospinal fluid:ENVO_02000029', 'pie:CandidateTerm_FoodOn_839', 'fluid:CandidateTerm_OtherOntology_76']	['cerebrospinal fluid:ENVO_02000029', 'pie:CandidateTerm_FoodOn_839']	2	GComponent Match	['Using Semantic Tagging Resources', 'Change Case and Abbreviation-Acronym Treatment']	[]	Component1-> cerebrospinal fluid:ENVO_02000029, Component2-> pie:CandidateTerm_FoodOn_839
-small_simple9	csf fld	cerebrospinal fluid fluid	[('cerebrospinal', 'JJ'), ('fluid', 'NN'), ('fluid', 'NN')]	fluid fluid	['cerebrospinal fluid', 'fluid', 'cerebrospinal']	['cerebrospinal:Quality-BodyRelated', 'cerebrospinal fluid:ENVO_02000029', 'fluid:CandidateTerm_OtherOntology_76']	['cerebrospinal fluid:ENVO_02000029']	1	GComponent Match	['Abbreviation-Acronym Treatment', 'Using Semantic Tagging Resources', 'Change Case and Abbreviation-Acronym Treatment']	[]	Component1-> cerebrospinal fluid:ENVO_02000029
+small_simple1	Breast	breast	[('breast', 'NN')]	breast	['breast']	['breast:CandidateTerm_FoodOn_188']	['breast:CandidateTerm_FoodOn_188']		Full Term Match	['Change of Case in Input Data']
+small_simple2	Baked Potato	baked potato	[('baked', 'VBN'), ('potato', 'NN')]	potato	['baked potato']	['potato (baked):FOODON_03302196']	['potato (baked):FOODON_03302196']		Full Term Match	['Permutation of Tokens in Bracketed Resource Term']
+small_simple3	fld	fluid	[('fluid', 'NN')]	fluid	['fluid']	['fluid:CandidateTerm_OtherOntology_76']	['fluid:CandidateTerm_OtherOntology_76']		Full Term Match	['A Direct Match with Cleaned Sample', 'Abbreviation-Acronym Treatment']
+small_simple4	froz fld	frozen fluid	[('frozen', 'JJ'), ('fluid', 'NN')]	fluid	['fluid', 'preservation by freezing']	['fluid:CandidateTerm_OtherOntology_76', 'preservation by freezing:FOODON_03470136']	['fluid:CandidateTerm_OtherOntology_76', 'preservation by freezing:FOODON_03470136']	2	Component Match	['Abbreviation-Acronym Treatment', 'Synonym Usage']	['froz']	Component1-> fluid:CandidateTerm_OtherOntology_76, Component2-> preservation by freezing:FOODON_03470136
+small_simple5	fld apple	fluid apple	[('fluid', 'NN'), ('apple', 'NN')]	fluid apple	['apple food product', 'fluid']	['apple food product:FOODON_00001611', 'fluid:CandidateTerm_OtherOntology_76']	['apple food product:FOODON_00001611', 'fluid:CandidateTerm_OtherOntology_76']	2	Component Match	['Abbreviation-Acronym Treatment', 'Suffix Addition- food product to the Input']	[]	Component1-> apple food product:FOODON_00001611, Component2-> fluid:CandidateTerm_OtherOntology_76
+small_simple6	csf	cerebrospinal fluid	[('cerebrospinal', 'JJ'), ('fluid', 'NN')]	fluid	['cerebrospinal fluid']	['cerebrospinal fluid:ENVO_02000029']	['cerebrospinal fluid:ENVO_02000029']		Full Term Match	['A Direct Match with Cleaned Sample', 'Change Case and Abbreviation-Acronym Treatment']
+small_simple7	csf f	cerebrospinal fluid fahrenheit	[('cerebrospinal', 'JJ'), ('fluid', 'NN'), ('fahrenheit', 'NN')]	fluid fahrenheit	['cerebrospinal', 'cerebrospinal fluid', 'fluid']	['cerebrospinal fluid:ENVO_02000029', 'cerebrospinal:Quality-BodyRelated', 'fluid:CandidateTerm_OtherOntology_76']	['cerebrospinal fluid:ENVO_02000029']	1	Component Match	['Change Case and Abbreviation-Acronym Treatment', 'Using Semantic Tagging Resources']	['f']	Component1-> cerebrospinal fluid:ENVO_02000029
+small_simple8	Pie csf	pie cerebrospinal fluid	[('pie', 'JJ'), ('cerebrospinal', 'JJ'), ('fluid', 'NN')]	fluid	['cerebrospinal', 'cerebrospinal fluid', 'fluid', 'pie']	['cerebrospinal fluid:ENVO_02000029', 'cerebrospinal:Quality-BodyRelated', 'fluid:CandidateTerm_OtherOntology_76', 'pie:CandidateTerm_FoodOn_839']	['cerebrospinal fluid:ENVO_02000029', 'pie:CandidateTerm_FoodOn_839']	2	Component Match	['Change Case and Abbreviation-Acronym Treatment', 'Using Semantic Tagging Resources']	[]	Component1-> cerebrospinal fluid:ENVO_02000029, Component2-> pie:CandidateTerm_FoodOn_839
+small_simple9	csf fld	cerebrospinal fluid fluid	[('cerebrospinal', 'JJ'), ('fluid', 'NN'), ('fluid', 'NN')]	fluid fluid	['cerebrospinal', 'cerebrospinal fluid', 'fluid']	['cerebrospinal fluid:ENVO_02000029', 'cerebrospinal:Quality-BodyRelated', 'fluid:CandidateTerm_OtherOntology_76']	['cerebrospinal fluid:ENVO_02000029']	1	Component Match	['Abbreviation-Acronym Treatment', 'Change Case and Abbreviation-Acronym Treatment', 'Using Semantic Tagging Resources']	[]	Component1-> cerebrospinal fluid:ENVO_02000029