In [None]:
! conda install -c conda-forge --yes spacy
! pip install allennlp
! python -m spacy download en
! python -m spacy download en_core_web_lg

In [None]:
import spacy
import pandas as pd

def is_verb(token):
    if not token.pos_ == 'VERB':
        return False
    if token.dep_ in ['acl', 'amod', 'prep'] and token.head.pos_ == 'NOUN':
        return False
    if token.dep_ in ['ccomp', 'advcl'] and token.head.pos_ == 'VERB':
        return False
    return True

def get_head(doc):
    return [t for t in doc if t.head == t][0]

def get_verbs(doc):
    return [t for t in doc if is_verb(t)]

def get_descendents(token):
    children = [c for c in token.children if not is_verb(c)]
    desc = [d for c in children for d in get_descendents(c)]
    return desc + [token]

def is_stop(t):
    return t.is_stop or len(str(t)) == 1 or t.pos_ in ['PUNCT', 'PART']

def get_objs(v):
    return [c for c in v.children 
            if c.dep_ in ['dobj', 'prep', 'ccomp', 'advcl']]

def get_np(verb):    
    children = get_objs(verb)
    other_verbs = [c for c in verb.children if c.dep_ in ['conj']]
    verbs = [verb]

    if other_verbs and not children:
        verbs += other_verbs
        children = [i for v in other_verbs for i in get_objs(v)]

    noun_phrase = [d for c in children 
                   for d in get_descendents(c) 
                   if not is_stop(d)]

    return [' '.join(map(str, x)) for x in [verbs, noun_phrase]]

def parse_sent(sent):
    nps = [get_np(v) for v in get_verbs(sent)]    
    nouns,verbs = [],[]
    for v,n in nps:
        if n and not n in nouns:
            verbs += [v]
            nouns += [n]
    return list(zip(verbs, nouns))

In [None]:
nlp = spacy.load('en_core_web_lg')
tasks = pd.read_csv('tasks.txt', sep='\t')

parsed = [nlp(t) for t in tasks.Task[0:100]]

In [486]:
interesting = [0,2,4,8,9,10,22,55,61,81]

[parse_sent(parsed[i]) for i in interesting]

[[('coordinate', 'an organization or budget financial activities'),
  ('fund', 'operations'),
  ('maximize', 'investments'),
  ('increase', 'efficiency')],
 [('Analyze',
   'operations or its staff company of objectives in performance or evaluate'),
  ('determine',
   'potential cost program or policy change improvement reduction of areas')],
 [('Prepare',
   'budgets approval for or programs of implementation funding for those including')],
 [('Establish', 'departmental responsibilities'),
  ('coordinate', 'and sites departments among functions')],
 [('Preside serve',
   'management or other governing boards committees directors of boards on')],
 [('Negotiate approve',
   'or agreements or state federal or other organizational entities agencies distributors suppliers with contracts')],
 [('coordinate', 'with involved businesses of activities'),
  ('buying selling', 'investment or financial services products')],
 [('Establish implement',
   'departmental or procedures objectives goals 

In [None]:
from spacy import displacy

displacy.render(parsed[2], style='dep', jupyter=True)

## ELMO Embedding

In [None]:
from allennlp.commands.elmo import ElmoEmbedder

elmo = ElmoEmbedder()
v = elmo.embed_sentence([str(t) for t in parsed[0]])

In [None]:
v.shape

(3, 21, 1024)