Skip to content

Commit

Permalink
Merge branch 'master' into development
Browse files Browse the repository at this point in the history
  • Loading branch information
dfornika committed Sep 11, 2018
2 parents 810f264 + 7da0181 commit 3f39921
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 1 addition & 1 deletion lexmapr/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.2'
__version__ = '0.1.2'
7 changes: 7 additions & 0 deletions lexmapr/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1286,6 +1286,7 @@ def run(args):
status_addendum.append("Non English Language Words Treatment")
elif (lemma.lower() in lookup_table["non_english_words_lower"].keys()):
lemma = lookup_table["non_english_words_lower"][lemma.lower()]

status_addendum.append("Change Case and Non English Language Words Treatment")


Expand All @@ -1294,6 +1295,7 @@ def run(args):
cleaned_sample = lemma.lower()
elif (
lemma.lower() not in lookup_table["stop_words"]): # if newphrase is not empty and lemma is in not in stopwordlist (abridged according to domain)

cleaned_sample = cleaned_sample + " " + lemma.lower()

cleaned_sample = re.sub(' +', ' ', cleaned_sample) # Extra innner spaces removed from cleaned sample
Expand Down Expand Up @@ -1350,6 +1352,7 @@ def run(args):
try:
# Find full-term match for sample
full_term_match = find_full_term_match(sample, lookup_table, cleaned_sample, status_addendum)

# Write to all headers
if args.format == "full":
fw.write("\t" + full_term_match["matched_term"] + "\t"
Expand All @@ -1369,6 +1372,7 @@ def run(args):
[covered_tokens.append(token) for token in sample_tokens]
# Remove all tokens from remaining_tokens
[remaining_tokens.remove(token) for token in sample_tokens]

# Set trigger to True
trigger = True
# Full-term match not found
Expand Down Expand Up @@ -1398,6 +1402,7 @@ def run(args):
# Remove token from remaining_tokens
remaining_tokens.remove(token)


remSetConv = set(remaining_tokens)
coveredAllTokensSetConv=set(covered_tokens)
remSetDiff = remSetConv.difference(coveredAllTokensSetConv)
Expand Down Expand Up @@ -1473,6 +1478,7 @@ def run(args):
if (len(partial_matches) > 0):
if args.format == 'full':
fw.write('\t' + str(list(partial_matches)) + '\t' + str(list(partialMatchedResourceListSet)) + '\t' + str(list(retainedSet)) + '\t' + str(len(retainedSet)) + '\t' + status + '\t' + str(list(final_status)) + '\t' + str(list(remSetDiff)))

compctr = 0
if args.format == 'full':
fw.write("\t")
Expand All @@ -1493,6 +1499,7 @@ def run(args):
if args.format == 'full':
fw.write('\t' + str(list(partial_matches)) + '\t' + str(list(partial_matches_with_ids)) + '\t\t' + "\t" + "Sorry No Match" + "\t" + str(list(remaining_tokens)))


#Output files closed
if fw is not sys.stdout:
fw.close()

0 comments on commit 3f39921

Please sign in to comment.