In [154]:
import stanza

# set 'download_method = None' to not download the resources over and over
nlp = stanza.Pipeline(lang='en', processors='tokenize,pos,constituency', download_method=None)


2023-03-30 09:47:36 INFO: Loading these models for language: en (English):
| Processor    | Package  |
---------------------------
| tokenize     | combined |
| pos          | combined |
| constituency | wsj      |

2023-03-30 09:47:36 INFO: Using device: cpu
2023-03-30 09:47:36 INFO: Loading: tokenize
2023-03-30 09:47:36 INFO: Loading: pos
2023-03-30 09:47:37 INFO: Loading: constituency
2023-03-30 09:47:37 INFO: Done loading processors!


In [254]:
doc = nlp('Syrian forces launch new attacks')
for sentence in doc.sentences:
    print(sentence.constituency)

(ROOT (S (NP (JJ Syrian) (NNS forces)) (VP (VBP launch) (NP (JJ new) (NNS attacks)))))


In [255]:
tree = doc.sentences[0].constituency

In [285]:
def equals(a, b):
    return a == b

def starts_with(a, b):
    return a.startswith(b)

def is_in(a, b):
    return a in b

def find_noun_phrase_verb_phrase(tree):
    noun_phrase, verb_phrase = None, None
    for child in tree.children:
        if child.label == 'NP':
            noun_phrase = child
        if child.label == 'VP':
            verb_phrase = child
    if noun_phrase is not None and verb_phrase is not None:
        return noun_phrase, verb_phrase
    for child in tree.children:
        return find_noun_phrase_verb_phrase(child)


def find_pos(tree, comparator, labels, find_first=False):
    # if tree.label in labels:
    #print(f"find_pos: {tree} : {tree.label} : {labels} : {comparator(tree.label, labels)}")
    if comparator(tree.label, labels):
        return tree
    pos = None
    if not tree.is_leaf():
        for child in tree.children:
            candidate = find_pos(child, comparator, labels, find_first)
            if candidate is not None:
                pos = candidate
                if find_first:
                    return pos
            #if pos is not None:
            #    return pos

    return pos

def trunk_construction(tree):
    subject, predicate, object = [], [], []

    noun_phrase = find_pos(tree, equals, 'NP', True)
    verb_phrase = find_pos(tree, equals, 'VP', True)
    subject = find_pos(noun_phrase, starts_with, 'NN', find_first=True)
    predicate = find_pos(verb_phrase, starts_with, 'VB')
    for child in verb_phrase.children:
        np_vp = find_pos(child, is_in, ['NP', 'VP'])
        if np_vp is not None:
            object = find_pos(np_vp, starts_with, 'NN')

        object = find_pos(child, starts_with, 'NN')

    return subject, predicate, object


In [286]:
def test_parser(str, subject_str, predicate_str, object_str):

    doc = nlp(str)
    tree = doc.sentences[0].constituency

    subject, predicate, object =  trunk_construction(tree)

    print(f"initial sentence: {str}")
    print(f"parse tree: {tree}")
    print(f"subject: {subject}")
    print(f"predicate: {predicate}")
    print(f"object: {object}")
    print("---------")

    assert subject.label.startswith('NN')
    assert subject.children[0].label == subject_str
    assert predicate.label.startswith('VB')
    assert predicate.children[0].label == predicate_str
    assert object.label.startswith('NN')
    assert object.children[0].label == object_str


In [287]:
test_parser('Syrian forces launch new attacks', 'forces', 'launch', 'attacks')
test_parser("""the flat tire was replaced by the driver""",
             "tire", "replaced", "driver")
test_parser("""Amrozi accused his brother, whom he called "the witness", of deliberately distorting his evidence.""",
            'Amrozi', 'distorting', 'evidence')
test_parser("""Shares of Genentech, a much larger company with several products on the market, rose more than 2 percent""",
            'Shares', 'rose', 'percent')
test_parser("""Gyorgy Heizler, head of the local disaster unit, said the coach was carrying 38 passengers.""",
             'Gyorgy', 'carrying', 'passengers')



initial sentence: Syrian forces launch new attacks
parse tree: (ROOT (S (NP (JJ Syrian) (NNS forces)) (VP (VBP launch) (NP (JJ new) (NNS attacks)))))
subject: (NNS forces)
predicate: (VBP launch)
object: (NNS attacks)
---------
initial sentence: the flat tire was replaced by the driver
parse tree: (ROOT (S (NP (DT the) (JJ flat) (NN tire)) (VP (VBD was) (VP (VBN replaced) (PP (IN by) (NP (DT the) (NN driver)))))))
subject: (NN tire)
predicate: (VBN replaced)
object: (NN driver)
---------
initial sentence: Amrozi accused his brother, whom he called "the witness", of deliberately distorting his evidence.
parse tree: (ROOT (S (NP (NNP Amrozi)) (VP (VBD accused) (NP (NP (NP (PRP$ his) (NN brother)) (, ,) (SBAR (WHNP (WP whom)) (S (NP (PRP he)) (VP (VBD called) (S (NP (`` ") (NP (DT the) (NN witness))))))) ('' ")) (, ,) (PP (IN of) (S (ADVP (RB deliberately)) (VP (VBG distorting) (NP (PRP$ his) (NN evidence))))))) (. .)))
subject: (NNP Amrozi)
predicate: (VBG distorting)
object: (NN evidenc