In [1]:
from nltk.parse import corenlp
from nltk.tree import Tree

In [2]:
# Remember to start Stanford CoreNLP java server separately.
parser = corenlp.CoreNLPParser(url='http://localhost:9000')

### Key utility functions

In [3]:
def get_parsetree(parser, sent):
    """
    Return parse tree from sentence.

    :param parser: an nltk.parse.corenlp.CoreNLPParser
    :returns: nltk.tree.Tree
    """
    parsed = parser.raw_parse(sent)
    collected = []
    for i in parsed:
        collected.append(i)
    parsetree = collected[0]
    return parsetree

In [4]:
def get_child_labels(t):
    """
    :param t: an nltk.tree.Tree
    :returns: list of labels of the children.
    """
    labels = []
    for child in t:
        labels.append(child.label())
        
    return labels

In [34]:
def test(parser, func, text, pprint=False, expected=None):
    print('input:'), 
    print(text, '\n')
    parsetreex = get_parsetree(parser, text)
    if pprint == True:
        parsetreex.pretty_print()
    
    result = func(parsetreex[0])
    print('output:') 
    print(result)
    print()
    if expected:
        print('expected:')
        print(expected, '\n')
        if result == expected:
            print('result: PASSING\n')
        else:
            print('result: FAILING\n')
            parsetreex.pretty_print()
    print('\n')

### Functions for `simple_find_parallel_sentence8`

In [35]:
def find_sub(t):
    """
    :param t: an nltk.tree.Tree
    :returns: list. a list representing parallel subcomponents.
    """
    commons = []
    subcomponents = []
    
    if t.height() > 2 and 'CC' in get_child_labels(t):
        # when height == 2, we only have leaves (string type) left in the tree.
        for child in t:
            if child.label() != 'CC':
                subcomponents.append(child.leaves())
    else:
        for child in t:
            if type(child) == Tree:
                results = find_sub(child)
                commons.extend(results[0])
                subcomponents.extend(results[1])
                #print(subcomponents)
            else:
                # a leaf (a string type)
                commons.append(child)
                
    return commons, subcomponents

In [36]:
def simple_find_parallel_sentence8(t):
    """
    From a sentence, finds, generates and prints parallel sub-sentences describing the same subject, if they exist.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."
    
    Looks for parallel noun phrases (NP, NN), parallel verbs (VP, V) and parallel adjectives (ADJP, JJ).
    Parallel phrases means these phrases are children of the same node.
    Also takes parallel sentences (S) into account.
    Also takes conditions ("if" or SBAR) into account.
    
    This iteration attempts to use a recursive approach to find the parallel parts once it finds the first 
    VP co-occuring (parallel) with an NP.
    
    :param t: an nltk.tree.Tree
    :returns: list of simpler sentences.
    """
    preconditions = []  # conditions. e.g. "if you are x".
    qualifiers = []  # phrases that qualify the parallel VPs.
    subsentences = []
    final_sents = []
    
    labels = get_child_labels(t)
    
    for child in t:
        if child.label() == 'SBAR':
            #print('precondition found.')
            preconditions.extend(child.leaves())
    
    if labels.count('S') >= 2:
        #print(">=2 'S' found on same level.")
        for child in t:
            if child.label() == 'S':
                subsentences.append(child.leaves())
        #print('new sentences are :')
        for sent in subsentences:
            final_sent_arr = []
            final_sent_arr.extend(preconditions)
            #final_sent_arr.extend(t[np_label].leaves()) # skipped
            final_sent_arr.extend(qualifiers)
            final_sent_arr.extend(sent)
            final_sent = " ".join(final_sent_arr) + '.'
            final_sents.append(final_sent)
        return final_sents
    
    elif 'NP' in labels:
        np_label = labels.index('NP')
        #print('NP found.')
        
        if 'VP' in labels:
            #print('NP and VP found on same level (under same node).')
            vp_index = labels.index('VP')
            labels2 = get_child_labels(t[vp_index])
            
            # if a child of the first VP is not VP, NP or ADJP, it is likely some qualifier (e.g. a VBP)?  
            #for child in t[vp_index]:
            #    if child.label() not in ['VP', 'NP', 'ADJP', 'S']:
            #        qualifiers.append(child.leaves())
            
            # check if 'CC' exists; todo: refactor?.
            leaves_pos = t[vp_index].pos()
            cc_exists = False
            for pair in leaves_pos:
                if pair[1] in ['CC']:
                    cc_exists = True
                    break
                else: 
                    cc_exists == False
            
            # recurse until 'CC' child is found or leaf is reached, then return and accumulate any parallel parts found.  
            if cc_exists:
                results = find_sub(t[vp_index]) # recursive step.
                qualifiers.extend(results[0])
                subsentences.extend(results[1])

                #print('new sentences are :')
                for sent in subsentences:
                    final_sent_arr = []
                    final_sent_arr.extend(preconditions)
                    final_sent_arr.extend(t[np_label].leaves())
                    final_sent_arr.extend(qualifiers)
                    final_sent_arr.extend(sent)
                    final_sent = " ".join(final_sent_arr) + '.'
                    final_sents.append(final_sent)
                    #print(final_sent)
                #print()
                return final_sents
            else:
                Exception('Sentence not decomposable.')
        
    else:
        raise Exception('Sentence structure not covered by function.')

### Test `simple_find_parallel_sentence_8` with standard test cases.

In [37]:
'''
TEST CASES - simple
'''

# parallel NN
text = "Bobby is a good father and great friend."
expected = [
    'Bobby is a good father.',
    'Bobby is a great friend.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)

# parallel "negative" NN 
text = "Bobby is not a good father and great friend."
expected = [
    'Bobby is not a good father.',
    'Bobby is not a great friend.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)

# parallel ADJP -> PASS
text = "Bobby is very handsome and super smart"
expected = [
    'Bobby is very handsome.',
    'Bobby is super smart.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)

# parallel ADJP AND JJ 
text = "Bobby is very handsome and smart"
expected = [
    'Bobby is very handsome.',
    'Bobby is smart.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)

# parallel VP 
text = "You are quick to grasp new concepts and equally quick to lose interest in an idea or project once your curiosity has been satisfied."
expected = ['You are quick to grasp new concepts.',
            'You are equally quick to lose interest in an idea or project once your curiosity has been satisfied.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)


'''
MORE TEST CASES - with conditional statements.
'''

# conditional and 2 adjectives
text = "If you have aries in rising, you are loud and superficial."
expected = [
    'If you have aries in rising, you are loud.',
    'If you have aries in rising, you are superficial.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)

# conditional and 2 "nested" adjectives
text = "If you have aries in rising, you tend to be loud and superficial"
expected = [
    'If you have aries in rising, you tend to be loud.',
    'If you have aries in rising, you tend to be superficial.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)

input:
Bobby is a good father and great friend. 

output:
['Bobby is a good father.', 'Bobby is great friend.']

expected:
['Bobby is a good father.', 'Bobby is a great friend.'] 

result: FAILING

                    ROOT                          
                     |                             
                     S                            
   __________________|__________________________   
  |        VP                                   | 
  |     ___|_________                           |  
  |    |             NP                         | 
  |    |        _____|_______________           |  
  NP   |       NP          |         NP         | 
  |    |    ___|_____      |     ____|____      |  
 NNP  VBZ  DT  JJ    NN    CC   JJ        NN    . 
  |    |   |   |     |     |    |         |     |  
Bobby  is  a  good father and great     friend  . 



input:
Bobby is not a good father and great friend. 

output:
['Bobby is not a good father.', 'Bobby is not great friend.']

expec

In [38]:
'''
MORE TEST CASES (2)
'''

text = "You strive for perfection and can be quite the person to live with or to be around because your standards for yourself and others are so high."
expected = [
    'You strive for perfection.',
    'You can be quite the person to live with or to be around because your standards for yourself and others are so high.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)

# conditional and 2 "nested" adjectives
text = "Your senses and emotions are quite strong and it would be wise for you to listen to your intuitive side as you can be quite psychic."
expected = [
    'Your senses and emotions are quite strong.',
    'it would be wise for you to listen to your intuitive side as you can be quite psychic.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)

input:
You strive for perfection and can be quite the person to live with or to be around because your standards for yourself and others are so high. 

output:
['You strive for perfection.', 'You can be quite the person to live with or to be around because your standards for yourself and others are so high.']

expected:
['You strive for perfection.', 'You can be quite the person to live with or to be around because your standards for yourself and others are so high.'] 

result: PASSING



input:
Your senses and emotions are quite strong and it would be wise for you to listen to your intuitive side as you can be quite psychic. 

output:
['Your senses and emotions are quite strong.', 'it would be wise for you to listen to your intuitive side as you can be quite psychic.']

expected:
['Your senses and emotions are quite strong.', 'it would be wise for you to listen to your intuitive side as you can be quite psychic.'] 

result: PASSING





In [39]:
'''
MORE TEST CASES (3)
'''

# condition appearing after adjective phrase.
text = "You are graceful and poised because you are calm and confident."
expected = [
    'You are graceful because you are calm and confident.',
    'You are poised because you are calm and confident.'
]
test(parser, simple_find_parallel_sentence8, text, expected=expected)


input:
You are graceful and poised because you are calm and confident. 

output:
['You are because you are graceful.', 'You are because you are poised.', 'You are because you are calm.', 'You are because you are confident.']

expected:
['You are graceful because you are calm and confident.', 'You are poised because you are calm and confident.'] 

result: FAILING

                               ROOT                                      
                                |                                         
                                S                                        
  ______________________________|______________________________________   
 |                              VP                                     | 
 |    __________________________|_________                             |  
 |   |            |                      SBAR                          | 
 |   |            |              _________|____                        |  
 |   |            |             |     

In [40]:
'''
MORE TEST CASES (4) - from the example astrology dataset.
'''

# 
text = "Comfort is important to you and you hate getting your hands dirty."
test(parser, simple_find_parallel_sentence8, text)

text = "Debate and argument appeal to you."
test(parser, simple_find_parallel_sentence8, text)

text = "Guard against possessiveness, jealousy, and taking the easy way out in your relationships."
test(parser, simple_find_parallel_sentence8, text)

text = "People with Virgo rising tend to be practical, analytical, discriminating, fastidious, careful, exacting, attentive to details, methodical, quiet, unassuming, shy, critical, thoughtful, and somewhat self-centered."
test(parser, simple_find_parallel_sentence8, text)

text = "You aim to please in practical and earthy ways."
test(parser, simple_find_parallel_sentence8, text)


input:
Comfort is important to you and you hate getting your hands dirty. 

output:
['Comfort is important to you.', 'you hate getting your hands dirty.']



input:
Debate and argument appeal to you. 

output:
None



input:
Guard against possessiveness, jealousy, and taking the easy way out in your relationships. 

output:
None



input:
People with Virgo rising tend to be practical, analytical, discriminating, fastidious, careful, exacting, attentive to details, methodical, quiet, unassuming, shy, critical, thoughtful, and somewhat self-centered. 

output:
['People with Virgo rising tend to be self-centered practical.', 'People with Virgo rising tend to be self-centered ,.', 'People with Virgo rising tend to be self-centered analytical.', 'People with Virgo rising tend to be self-centered ,.', 'People with Virgo rising tend to be self-centered discriminating.', 'People with Virgo rising tend to be self-centered ,.', 'People with Virgo rising tend to be self-centered fastidious.', 'Pe

### Compare `simple_find_parallel_sentence7` with `simple_find_parallel_sentence8`

In [41]:
def simple_find_parallel_sentence7(t):
    """
    From a sentence, finds the earliest parallel sub-sentences describing the same subject, if it exists.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."
    
    Takes into account parallel noun phrases (NP, NN), parallel verbs (VP, V) and parallel adjectives (ADJP, JJ).
    Also takes parallel sentences (S) into account .
    Also takes conditions ("if" or SBAR) into account.
    
    :param t: an nltk.tree.Tree
    :returns: 2-ple of nltk.tree.Tree
    """
    preconditions = []  # conditions. e.g. "if you are x".
    qualifiers = []  # phrases that qualify the subsentences.
    subsentences = []
    
    labels = get_child_labels(t)
    
    final_sents = []
    for child in t:
        if child.label() == 'SBAR':
            #print('precondition found.')
            preconditions.extend(child.leaves())
    
    if labels.count('S') >= 2:
        #print(">=2 'S' found on same level.")
        for child in t:
            if child.label() == 'S':
                subsentences.append(child.leaves())
        #print('new sentences are :')
        for sent in subsentences:
            #print(preconditions, sent)
            final_sent_arr = []
            final_sent_arr.extend(preconditions)
            #final_sent_arr.extend(t[np_label].leaves())  # skipped.
            final_sent_arr.extend(qualifiers)
            final_sent_arr.extend(sent)
            final_sent = " ".join(final_sent_arr) + '.'
            final_sents.append(final_sent)
        return final_sents
    
    elif 'NP' in labels:
        np_label = labels.index('NP')
        #print('NP found.')
        if 'VP' in labels:
            #print('NP and VP found on same level.')
            vp_index = labels.index('VP')
            labels2 = get_child_labels(t[vp_index])
            
            # if a child of the first VP is not VP, NP or ADJP, it is likely some qualifier?  
            for child in t[vp_index]:
                if child.label() not in ['VP', 'NP', 'ADJP']:
                    qualifiers.extend(child.leaves())
            
            if labels2.count('VP') >= 2:
                #print('found parallel VP.')
                for child in t[vp_index]:
                    if child.label() == 'VP':
                        subsentences.append(child.leaves())
                    
            elif labels2.count('ADJP') == 1: 
                #print('found ADJP under VP.')
                adjp_index = labels2.index('ADJP')
                labels3 = get_child_labels(t[vp_index][adjp_index])
                if labels3.count('ADJP') + labels3.count('JJ') >= 2:
                    #print('found parallel ADJP')
                    for child in t[vp_index][adjp_index]:
                        if child.label() == 'ADJP' or child.label() == 'JJ':
                            subsentences.append(child.leaves())

            elif labels2.count('NP') == 1:
                #print('found NP under VP.')
                np_index = labels2.index('NP')
                labels3 = get_child_labels(t[vp_index][np_index])
                if labels3.count('NP') + labels3.count('NN') >= 2:
                    #print('found parallel NP')
                    for child in t[vp_index][np_index]:
                        if child.label() == 'NP' or child.label() == 'NN':
                            subsentences.append(child.leaves())

        elif 'ADJP' in labels:
            index = labels.index('ADJP')
            labels2 = get_child_labels(t[index])
            if labels2.count('ADJP') >= 2:
                #print('found parallel ADJP.')
                for child in t[index]:
                    if child.label() == 'ADJP':
                        subsentences.append(child.leaves())
        
        #print('new sentences are :')
        for sent in subsentences:
            #print(preconditions, t[np_label].leaves(), qualifiers, sent)
            final_sent_arr = []
            final_sent_arr.extend(preconditions)
            final_sent_arr.extend(t[np_label].leaves())
            final_sent_arr.extend(qualifiers)
            final_sent_arr.extend(sent)
            final_sent = " ".join(final_sent_arr) + '.'
            final_sents.append(final_sent)
        return final_sents
        #print()
            
        # todo: return n new trees representing the n new sentences or return n new string sentences.
        # todo: Rewrite code to fit more general cases. Right now, the logic is very 'hard-coded'.
        # todo: consider the possibility of a recursive algo.
        
    else:
        raise exception('Sentence structure not covered by function.')

### Standard test cases applied to `simple_find_parallel_sentence7`

In [42]:
'''
TEST CASES - simple
'''

# parallel NN
text = "Bobby is a good father and great friend."
expected = [
    'Bobby is a good father.',
    'Bobby is a great friend.'
]
test(parser, simple_find_parallel_sentence7, text, expected=expected)

# parallel "negative" NN 
text = "Bobby is not a good father and great friend."
expected = [
    'Bobby is not a good father.',
    'Bobby is not a great friend.'
]
test(parser, simple_find_parallel_sentence7, text, expected=expected)

# parallel ADJP -> PASS
text = "Bobby is very handsome and super smart"
expected = [
    'Bobby is very handsome.',
    'Bobby is super smart.'
]
test(parser, simple_find_parallel_sentence7, text, expected=expected)

# parallel ADJP AND JJ 
text = "Bobby is very handsome and smart"
expected = [
    'Bobby is very handsome.',
    'Bobby is smart.'
]
test(parser, simple_find_parallel_sentence7, text, expected=expected)

# parallel VP 
text = "You are quick to grasp new concepts and equally quick to lose interest in an idea or project once your curiosity has been satisfied."
expected = ['You are quick to grasp new concepts.',
            'You are equally quick to lose interest in an idea or project once your curiosity has been satisfied.'
]
test(parser, simple_find_parallel_sentence7, text, expected=expected)


'''
MORE TEST CASES - with conditional statements.
'''

# conditional and 2 adjectives
text = "If you have aries in rising, you are loud and superficial."
expected = [
    'If you have aries in rising, you are loud.',
    'If you have aries in rising, you are superficial.'
]
test(parser, simple_find_parallel_sentence7, text, expected=expected)

# conditional and 2 "nested" adjectives
text = "If you have aries in rising, you tend to be loud and superficial"
expected = [
    'If you have aries in rising, you tend to be loud.',
    'If you have aries in rising, you tend to be superficial.'
]
test(parser, simple_find_parallel_sentence7, text, expected=expected)

input:
Bobby is a good father and great friend. 

output:
['Bobby is a good father.', 'Bobby is great friend.']

expected:
['Bobby is a good father.', 'Bobby is a great friend.'] 

result: FAILING

                    ROOT                          
                     |                             
                     S                            
   __________________|__________________________   
  |        VP                                   | 
  |     ___|_________                           |  
  |    |             NP                         | 
  |    |        _____|_______________           |  
  NP   |       NP          |         NP         | 
  |    |    ___|_____      |     ____|____      |  
 NNP  VBZ  DT  JJ    NN    CC   JJ        NN    . 
  |    |   |   |     |     |    |         |     |  
Bobby  is  a  good father and great     friend  . 



input:
Bobby is not a good father and great friend. 

output:
['Bobby is not a good father.', 'Bobby is not great friend.']

expec

In [43]:
'''
MORE TEST CASES (3)
'''

# condition appearing after adjective phrase.
text = "You are graceful and poised because you are calm and confident."
expected = [
    'You are graceful because you are calm and confident.',
    'You are poised because you are calm and confident.'
]
test(parser, simple_find_parallel_sentence7, text, expected=expected)

'''
MORE TEST CASES (4) - from the example astrology dataset.
'''

text = "Comfort is important to you and you hate getting your hands dirty."
test(parser, simple_find_parallel_sentence7, text)

text = "Debate and argument appeal to you."
test(parser, simple_find_parallel_sentence7, text)

text = "Guard against possessiveness, jealousy, and taking the easy way out in your relationships."
test(parser, simple_find_parallel_sentence7, text)

text = "People with Virgo rising tend to be practical, analytical, discriminating, fastidious, careful, exacting, attentive to details, methodical, quiet, unassuming, shy, critical, thoughtful, and somewhat self-centered."
test(parser, simple_find_parallel_sentence7, text)

text = "You aim to please in practical and earthy ways."
test(parser, simple_find_parallel_sentence7, text)


input:
You are graceful and poised because you are calm and confident. 

output:
[]

expected:
['You are graceful because you are calm and confident.', 'You are poised because you are calm and confident.'] 

result: FAILING

                               ROOT                                      
                                |                                         
                                S                                        
  ______________________________|______________________________________   
 |                              VP                                     | 
 |    __________________________|_________                             |  
 |   |            |                      SBAR                          | 
 |   |            |              _________|____                        |  
 |   |            |             |              S                       | 
 |   |            |             |      ________|____                   |  
 |   |            |           