/
SimilarityScorev2.py
42 lines (36 loc) · 1.6 KB
/
SimilarityScorev2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import spacy
import numpy as np
nlp = spacy.load("en_ner_bionlp13cg_md")
def check_negation(entity):
negations = ['not', 'never', 'no', 'neither', 'none', 'nobody', 'nowhere', 'nothing', 'without', "don't", "can't", "won't", "isn't", "aren't", "wasn't", "weren't", "haven't", "hasn't", "hadn't", "shouldn't", "wouldn't", "couldn't", "doesn't", "didn't", "may not", "might not", "need not", "mustn't", "shall not", "will not", "ought not to", "nevermore", "rarely", "scarcely"]
for neg in negations:
if neg in entity:
return True
return False
def sim_score(ref,gen):
a = nlp(ref).similarity(nlp(gen))
weight = 0.5
if check_negation(ref) ^ check_negation(gen): #if one of them hase negation in text
return a*weight
else:
return a
def calculate_ratio(table):
ratios = []
maximum = []
for column in table.T: # Iterate over the columns of the table
max_value = np.max(column)
average_value = np.mean(column)
if max_value + average_value != 0:
ratio = max_value / (max_value + average_value)
else:
ratio=0
ratios.append(ratio)
return ratios
def similarity_score(ref_term, gen_term):
# Calculate the similarity scores between each pair of terms
similarity_scores = [[sim_score(ref, gen) for gen in gen_term] for ref in ref_term]
# print(similarity_scores)
ratio = calculate_ratio(np.array(similarity_scores))
# Determine the length of the longest term in each list
# avg_score = sum(sum(similarity_scores, [])) / (len(ref_term) * len(gen_term))
return ratio