## Assignment: Working with Dependency Graphs (Parses)

Define functions to:
- expract a path of dependency relations from the ROOT to a token
- extract subtree of a dependents given a token
- check if a given list of tokens (segment of a sentence) forms a subtree
- identify head of a span, given its tokens
- extract sentence subject, direct object and indirect object spans

In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")
sentence='I saw the man with a telescope'

## extract a path of dependency relations from the ROOT to a token

In [2]:
def firstpoint(sentence):
    doc = nlp(sentence)
    token_path=[]
    path={}
    for t in doc:
        temp=t
        while not t.dep_=='ROOT': 
            token_path.insert(0,t.dep_)
            t=t.head  
        token_path.insert(0,'ROOT')
        path[temp]=token_path
        token_path=[]

    return path

d=firstpoint(sentence)

for tok in d:
    print('{}\t{}'.format(tok, d[tok]))

I	['ROOT', 'nsubj']
saw	['ROOT']
the	['ROOT', 'dobj', 'det']
man	['ROOT', 'dobj']
with	['ROOT', 'prep']
a	['ROOT', 'prep', 'pobj', 'det']
telescope	['ROOT', 'prep', 'pobj']


## extract subtree of a dependents given a token

In [3]:
def secondopunto(sentence):
    doc = nlp(sentence)
    result={}
    path=[]
    for t in doc: 
        for token in t.subtree:
            path.append(token.text) #appending token.text and not token because it will simplify the search
            
        
        result[t]=path
        path=[]

    return result


d=secondopunto(sentence)

for tok in d:
    print('{}\t{}'.format(tok, d[tok]))


I	['I']
saw	['I', 'saw', 'the', 'man', 'with', 'a', 'telescope']
the	['the']
man	['the', 'man']
with	['with', 'a', 'telescope']
a	['a']
telescope	['a', 'telescope']


## check if a given list of tokens (segment of a sentence) forms a subtree

In [4]:
def terzopunto_b(sentence, sub):
    d=secondopunto(sentence)
    check=False
    for key,values in d.items():
        if (sub==values):
            check=True
            
    return check
terzopunto_b(sentence, ['a','telescope'])


True

## identify head of a span, given its tokens

In [5]:
def quartopunto(sentence,start=0, end=len(sentence)):
    doc = nlp(sentence)
    span= doc[0:len(sentence)]
    return span.root
    

    
s_span=quartopunto(sentence,3,5)

n_span=quartopunto(sentence)
print ('con star/end: ', s_span)
print('no start/end:' , n_span)

con star/end:  saw
no start/end: saw


## extract sentence subject, direct object and indirect object spans

In [6]:
def quintopunto(sentence):
    doc = nlp(sentence)
    #search = ['nsubj', 'dobj', 'iobj']
    
    res={ 'nsubj': [] , 'dobj': [] , 'iobj':[] } 
    
    for key in res.keys(): 
        for t in doc: 
            if (t.dep_==key):
                for t in t.subtree:
                    res[key].append(t)
    return res

quintopunto(sentence)

{'nsubj': [I], 'dobj': [the, man], 'iobj': []}