# Testing the Code

In [1]:
import spacy
import numpy as np
import QAfeatures,dennyCode_modified

entMapping = {'TIME':['DATE','CARDINAL','TIME'],
              'LOCATION':['GPE','LOC'],
              'PERSON':['PERSON','ORG'],
              'AMT_COUNTABLE':['QUANTITY','MONEY','CARDINAL'],
              'AMT_UNCOUNTABLE':['QUANTITY','MONEY','CARDINAL']}


Answer_File = 'messi.txt'
with open(Answer_File,'r') as f:
    rawText = f.read()

rawText = rawText.replace('\n','.')

question = 'What disease was Messi diagnosed with?'

sentenceDict = dennyCode_modified.find_similar_sentences(rawText,question,3)
print('\n'.join((sentenceDict[i].text.strip()+ ' -- score ' + str(i)) for i in sentenceDict))        


QS = QAfeatures.QuestionSense(question)
Q_verbParent = QAfeatures.verbParent(QS.questionChain)

print('\n')
for i,score in enumerate(sentenceDict):
    sentence = sentenceDict[score]
    if QS.ansType:
        candidates = [ent.root for ent in sentence.ents]
    elif QS.descriptors:
        candidates = [p.root for p in sentence.noun_chunks]
    else:
        candidates = [p.root for p in sentence.noun_chunks]
        for token in sentence:
            if token not in candidates and not any(token in p for p in sentence.noun_chunks):
                candidates.append(token)
    
    AS = QAfeatures.AnswerSense(sentence,candidates)
    vectors = {}
    
    for candidate in AS.nodeDic:
        
        # Fill out the feature vector, [v1 v2 v3 v4 v5 v6 v7]
        # v1: similarity between descriptor and candidate (default 0)
        # v2: similarity between candidate's verb parent and question's verb parent
        # v3: fraction of downwards dependents of question particle that are shared
        #     by the candidate (default 1)
        # v4: whether the candidate answer is contained within the question
        # v5: the length of the candidate answer's chain to its root
        # v6: if the question has an answer type, whether the candidate named-entity
        #     is of the type needed (default 0)
        # v7: the length of the candidate answer
        node,chain = AS.nodeDic[candidate]
        
        if QS.descriptors:
            v1 = candidate.similarity(QS.descriptors)
        else:
            v1 = 0

        A_verbParent = QAfeatures.verbParent(chain)
        if not Q_verbParent or not A_verbParent:
            v2 = 0
        else:
            v2 = A_verbParent.similarity(Q_verbParent)

        if not QS.questionNode.children:
            v3 = 1
        else:
            q = set()
            for parseNode in QS.questionNode.children:
                token = parseNode.token.root if type(parseNode.token)==spacy.tokens.Span \
                        else parseNode.token
                q.add(token.text)

            r = set()
            for parseNode in candidate.children:
                token = parseNode.token.root if type(parseNode.token)==spacy.tokens.Span \
                        else parseNode.token
                r.add(token.text)

            v3 = len(q.intersect(r))/len(q)

        v4 = int('~|'.join(c.text for c in candidate) in \
                 '~|'.join(t.text for t in QS.doc))

        v5 = len(chain)

        v6 = 0
        if QS.ansType and candidate in AS.doc.ents and candidate.label_ in entMapping[QS.ansType]:
            v6 = 1

        v7 = len(candidate)

        vec = np.array([v1,v2,v3,v4,v5,v6,v7])
        #print(candidate,vec)
        vectors[candidate] = vec
    print('Vectors for sentence {}:'.format(i+1), vectors)

114
After completing his growth hormone treatment aged 14, Messi became an integral part of the "Baby Dream Team", Barcelona's greatest-ever youth side. -- score 0.5974470041681134
However, his future as a professional player was threatened when, at age 10, he was diagnosed with a growth hormone deficiency. -- score 0.61800181790615
Messi making his Maradona-esque run against Getafe in 2007.Already frequently compared to compatriot Diego Maradona, Messi proved their similarity when he nearly replicated Maradona's two most famous goals in the span of seven weeks. -- score 0.6374081137240359


Vectors for sentence 1: {his growth hormone treatment: array([0.60092825, 0.17240225, 1.        , 0.        , 3.        ,
       0.        , 4.        ]), Messi: array([-0.03365252,  0.28499752,  1.        ,  1.        ,  1.        ,
        0.        ,  1.        ]), Barcelona's greatest-ever youth side: array([0.32164583, 0.28499752, 1.        , 0.        , 4.        ,
       0.        , 7.      

In [1]:
from Utils import get_features

In [2]:
vecs = get_features(text='messi.txt',question='What disease was Messi diagnosed with?',num_rel_sentences=3)

114
