### use of dictionary based stemmer such as `Hunspell` 

In [2]:
from spacy.en import English
from nltk.stem.wordnet import WordNetLemmatizer

In [3]:
lemmat = WordNetLemmatizer()

In [4]:
lemmat.lemmatize('times')

'time'

### Looking at the above example the lemmatizer is not enough for us to do the task.
We should avoid doing the lemmatization

[universal dependies](http://universaldependencies.org/en/dep/)


In [5]:
SUBJECTS = ["nsubj", "nsubjpass","csubj", "csubjpass", "agent", "expl"]
OBJECTS = ["dobj", "dative", "attr", "oprd"]

In [6]:
def isNegated(tok):
    negations = {"no", "not", "n't", "never", "none"}
    for dep in list(tok.lefts) + list(tok.rights):
        if dep.lower_ in negations:
            return True
    return False

In [7]:
def getSubsFromConjunctions(subs):
    moreSubs = []
    for sub in subs:
        # type of rights is generator
        rights = list(sub.rights)
        rightDeps = {tok.lower_ for tok in rights}
        if "and" in rightDeps:
            moreSubs.extend([tok for tok in rights if tok.dep_ in SUBJECTS or tok.pos_ == "NOUN"])
            if len(moreSubs) > 0:
                moreSubs.extend(getSubsFromConjunctions(moreSubs))
        return moreSubs

In [8]:
def getObjsFromConjunctions(objs):
    moreObjs = []
    for obj in objs:
        rights = list(obj.rights)
        rightDeps = {tok.lower_ for tok in rights}
        if "and" in rightDeps:
            moreObjs.extend([tok
                            for tok in rights if tok.dep_ in OBJECTS
                            or tok.pos_ == "NOUN"])
            if len(moreObjs) > 0:
                moreObjs.extend(getObjsFromConjunctions(moreObjs))
    return moreObjs

In [9]:
def getVerbFromConjunctions(verbs):
    moreVerbs = []
    for verb in verbs:
        rightDeps = {tok.lower_ for tok in list(verb.rights)}
        if "and" in rightDeps:
            moreVerbs.extend([tok
                             for tok in list(verb.rights)
                             if tok.pos_ == "VERB"])
            if len(moreVerbs) > 0:
                moreVerbs.extend(getVerbFromConjunctions(moreVerbs))
    return moreVerbs

In [10]:
def findSVs(tokens):
    svs = []
    verbs = [tok
             for tok in tokens if tok.pos_ == "VERB"]
    for v in verbs:
        subs, verbNegated = getAllSubs(v)
        if len(subs) > 0:
            for sub in subs:
                svs.append((sub.orth_, 
                            "!" + v.orth_ if verbNegated else v.orth_))
    return svs

In [11]:
def getObjsFromPrepositions(deps):
    objs = []
    for dep in deps:
        if dep.pos_ == "ADP" and dep.dep_ == "prep":
            objs.extend([tok
                        for tok in list(dep.rights)
                        if tok.dep_ in OBJECTS
                        or (tok.pos_ == "PRON" and tok.lower_ == "me")])
    return objs

In [12]:
def getObjsFromAttrs(deps):
    for dep in deps:
        if dep.pos_ == "NOUN" and dep.dep_ =="attr":
            verbs = [tok
                    for tok in dep.rights if tok.pos_ == "VERB"]
            if len(verbs) > 0:
                for v in verbs:
                    rights = list(v.rights)
                    objs = [tok
                           for tok in rights if tok.dep_ in OBJECTS]
                    objs.extend(getObjsFromPrepositions(rights))
                    if len(objs) > 0:
                        return v, objs
    return None, None

In [13]:
def getObjFromXComp(deps):
    for dep in deps:
        if dep.pos_ == "VERB" and dep.dep_ == "xcomp":
            v = dep
            rights = list(v.rights)
            objs = [tok
                   for tok in list(rights) if tok.dep_ in OBJECTS]
            objs.extend(getObjsFromPrepositions(rights))
            if len(objs) > 0:
                return v, objs
    return None, None

In [14]:
def findSubs(tok):
    head = tok.head
    while head.pos_ != "VERB" and head.pos_ != "NOUN" and head.head != head:
        head = head.head
    if head.pos_ == "VERB":
        subs = [tok for tok in head.lefts if tok.dep_ == "SUB"]
        if len(subs) > 0:
            verbNegated = isNegated(head)
            subs.extend(getSubsFromConjunctions(subs))
            return subs, verbNegated
        elif head.pos_ == "NOUN":
            return findSubs(head)
    elif head.pos_ == "NOUN":
        return [head], isNegated(tok)
    return [], False

In [15]:
def getAllSubs(v):
    verbNegated = isNegated(v)
    subs = [tok for tok in v.lefts if tok.dep_ in SUBJECTS and tok.pos_ != "DET"]
    if len(subs) > 0:
        subs.extend(getSubsFromConjunctions(subs))
    else:
        foundSubs, verbNegated = findSubs(v)
        subs.extend(foundSubs)
    return subs, verbNegated


def getAllObjs(v):
    # rights is a generator
    rights = list(v.rights)
    objs = [tok for tok in rights if tok.dep_ in OBJECTS]
    objs.extend(getObjsFromPrepositions(rights))
    
    potentialNewVerbs, potentialNewObjs = getObjFromXComp(rights)
    if potentialNewVerbs is not None and potentialNewObjs is not None and len(potentialNewObjs) > 0:
        objs.extend(potentialNewObjs)
        v = potentialNewVerbs
    if len(objs) > 0:
        objs.extend(getObjsFromConjunctions(objs))
    return v, objs

In [16]:
def findSVOs(tokens):
    svos = []
    verbs = [tok for tok in tokens if tok.pos_ == "VERB" and tok.dep_ != "aux"]
    
    for v in verbs:
        subs, verbNegated = getAllSubs(v)
        # is there are no subs then dont examine the verb any longer
        if len(subs) > 0:
            v, objs = getAllObjs(v)
            for sub in subs:
                for obj in objs:
                    objNegated = isNegated(obj)
                    svos.append((sub.lower_, "!", + v.lower_ if verbNegated or objNegated else v.lower_, obj.lower_))
                    # the above if statement doesn't make any sense though. need to look into it
    return svos

In [17]:
def printDeps(toks):
    for tok in toks:
        print(tok.orth_, tok.dep_, tok.pos_, tok.head.orth_, [t.orth_ for t in tok.lefts], [t.orth_ for t in tok.rights])

In [18]:
def testSVOs(sent):
    nlp = English()
    tok = nlp(sent)
    svos = findSVOs(tok)
    printDeps(tok)
    print(svos)

In [19]:
def main(sent):
    testSVOs(sent)

In [19]:
sent = "For Halloween Debby and her sister combined the candy they received. Debby had 32 pieces of candy while her sister had 42. If they ate 35 pieces the first night, how many pieces do they have left?"
testSVOs(sent)

KeyboardInterrupt: 