In [1]:
from nltk.parse import corenlp

In [2]:
from nltk.tree import Tree

In [3]:
t = Tree(1, [2, Tree(3, [4]), 5])
print(t)

(1 2 (3 4) 5)


In [4]:
s = Tree.fromstring("(S (NP I) (VP (V saw) (NP him)))")
print(s)

(S (NP I) (VP (V saw) (NP him)))


In [5]:
s.pretty_print()

     S         
  ___|___       
 |       VP    
 |    ___|___   
 NP  V       NP
 |   |       |  
 I  saw     him



In [6]:
output = "(ROOT (S(NP (PRP You))(VP(VP (VBP grasp)(NP (NNS concepts))(ADVP (RB easily)))(CC and)(VP (MD may)(VP (VB become)(ADJP (JJ impatient))(PP (IN with)(NP(NP (DT those))(SBAR(WHNP (WP who))(S(VP (VBP do) (RB n't)(VP (VB learn)(ADVP (RB as) (RB quickly)))))))))))(. .)))"

In [7]:
parsetree = Tree.fromstring(output)
print(parsetree)

(ROOT
  (S
    (NP (PRP You))
    (VP
      (VP (VBP grasp) (NP (NNS concepts)) (ADVP (RB easily)))
      (CC and)
      (VP
        (MD may)
        (VP
          (VB become)
          (ADJP (JJ impatient))
          (PP
            (IN with)
            (NP
              (NP (DT those))
              (SBAR
                (WHNP (WP who))
                (S
                  (VP
                    (VBP do)
                    (RB n't)
                    (VP (VB learn) (ADVP (RB as) (RB quickly)))))))))))
    (. .)))


In [8]:
parsetree.pretty_print()

                                                   ROOT                                               
                                                    |                                                  
                                                    S                                                 
  __________________________________________________|_______________________________________________   
 |                         VP                                                                       | 
 |            _____________|________                                                                |  
 |           |             |        VP                                                              | 
 |           |             |    ____|________                                                       |  
 |           |             |   |             VP                                                     | 
 |           |             |   |     ________|____________           

In [9]:
# access children (which are also trees)
parsetree[0][1][2][1][2].pretty_print()

       PP                                      
  _____|____                                    
 |          NP                                 
 |      ____|________                           
 |     |            SBAR                       
 |     |     ________|_____                     
 |     |    |              S                   
 |     |    |              |                    
 |     |    |              VP                  
 |     |    |     _________|____                
 |     |    |    |   |          VP             
 |     |    |    |   |      ____|___            
 |     NP  WHNP  |   |     |       ADVP        
 |     |    |    |   |     |     ___|______     
 IN    DT   WP  VBP  RB    VB   RB         RB  
 |     |    |    |   |     |    |          |    
with those who   do n't  learn  as      quickly



In [10]:
parsetree[0][1][2][1][2].label()

'PP'

In [11]:
parsetree.productions()

[ROOT -> S,
 S -> NP VP .,
 NP -> PRP,
 PRP -> 'You',
 VP -> VP CC VP,
 VP -> VBP NP ADVP,
 VBP -> 'grasp',
 NP -> NNS,
 NNS -> 'concepts',
 ADVP -> RB,
 RB -> 'easily',
 CC -> 'and',
 VP -> MD VP,
 MD -> 'may',
 VP -> VB ADJP PP,
 VB -> 'become',
 ADJP -> JJ,
 JJ -> 'impatient',
 PP -> IN NP,
 IN -> 'with',
 NP -> NP SBAR,
 NP -> DT,
 DT -> 'those',
 SBAR -> WHNP S,
 WHNP -> WP,
 WP -> 'who',
 S -> VP,
 VP -> VBP RB VP,
 VBP -> 'do',
 RB -> "n't",
 VP -> VB ADVP,
 VB -> 'learn',
 ADVP -> RB RB,
 RB -> 'as',
 RB -> 'quickly',
 . -> '.']

In [12]:
def get_child_labels(t):
    """
    :param t: an nltk.tree.Tree
    :returns: list of labels of the children.
    """
    labels = []
    for child in t:
        labels.append(child.label())
        
    return labels

In [13]:
def simple_find_parallel_sentence(t):
    """
    From a sentence, finds two sub-sentences describing the same subject in parallel.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."

    :param t: an nltk.tree.Tree
    :returns: 2-ple of nltk.tree.Tree
    """
    subsentences = []
    labels = get_child_labels(t)
    
    if 'NP' in labels:
        np_label = labels.index('NP')
        print('NP found.')
        if 'VP' in labels:
            index = labels.index('VP')
            labels2 = get_child_labels(t[index])
            if labels2.count('VP') >= 2:
                print('found parallel VP subssentences.')
                for child in t[index]:
                    if child.label() == 'VP':
                        subsentences.append(child.leaves())
        elif 'ADJP' in labels:
            index = labels.index('ADJP')
            labels2 = get_child_labels(t[index])
            if labels2.count('ADJP') >= 2:
                print('found parallel ADJP sub-sentences.')
                for child in t[index]:
                    if child.label() == 'ADJP':
                        subsentences.append(child.leaves())
        print('new sentences are :')
        for sent in subsentences:
            print(t[np_label].leaves(), sent)
            
        # todo: return n new trees representing the n new sentences or return n new string sentences.
        
    else:
        print('Sentence may be invalid.')
    

In [14]:
simple_find_parallel_sentence(parsetree[0])

NP found.
found parallel VP subssentences.
new sentences are :
['You'] ['grasp', 'concepts', 'easily']
['You'] ['may', 'become', 'impatient', 'with', 'those', 'who', 'do', "n't", 'learn', 'as', 'quickly']


In [15]:
# Remember to start Stanford CoreNLP java server separately.
parser = corenlp.CoreNLPParser(url='http://localhost:9000')

In [16]:
parsed = parser.raw_parse('The quick brown fox jumps over the lazy dog.')

In [17]:
type(parsed)

list_iterator

In [18]:
collected = []
for i in parsed:
    collected.append(i)

In [19]:
print(collected)

[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('VP', [Tree('VBZ', ['jumps']), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]


In [20]:
collected[0].pretty_print()

                     ROOT                          
                      |                             
                      S                            
       _______________|__________________________   
      |                         VP               | 
      |                _________|___             |  
      |               |             PP           | 
      |               |     ________|___         |  
      NP              |    |            NP       | 
  ____|__________     |    |     _______|____    |  
 DT   JJ    JJ   NN  VBZ   IN   DT      JJ   NN  . 
 |    |     |    |    |    |    |       |    |   |  
The quick brown fox jumps over the     lazy dog  . 



In [21]:
parsed2 = parser.raw_parse('You are hardworking and intelligent.')
collected = []
for i in parsed2:
    collected.append(i)
parsetree2 = collected[0]
parsetree2.pretty_print()

            ROOT                            
             |                               
             S                              
  ___________|____________________________   
 |                   VP                   | 
 |    _______________|___                 |  
 |   |                  ADJP              | 
 |   |        ___________|________        |  
 NP  |      ADJP         |       ADJP     | 
 |   |       |           |        |       |  
PRP VBP     VBG          CC       JJ      . 
 |   |       |           |        |       |  
You are hardworking     and  intelligent  . 



In [22]:
simple_find_parallel_sentence(parsetree2[0])

NP found.
new sentences are :


so 'You are hardworking and intelligent.' is a different syntactic structure from 'you grasp concepts easily and may become impatient with those who don't learn as quickly.'. This means i need to rewite my simple_find_parallel_sentence() function.

In [23]:
def simple_find_parallel_sentence2(t):
    """
    From a sentence, finds two sub-sentences describing the same subject in parallel.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."

    :param t: an nltk.tree.Tree
    :returns: 2-ple of nltk.tree.Tree
    """
    subsentences = []
    labels = get_child_labels(t)
    
    if 'NP' in labels:
        np_label = labels.index('NP')
        print('NP found.')
        if 'VP' in labels:
            print('NP and VP found on same level.')
            vp_index = labels.index('VP')
            labels2 = get_child_labels(t[vp_index])
            if labels2.count('VP') >= 2:
                '''
                e.g.
                
                '''
                print('found parallel VP.')
                for child in t[vp_index]:
                    if child.label() == 'VP':
                        subsentences.append(child.leaves())
            elif labels2.count('ADJP') == 1: 
                print('found ADJP under VP.')
                adjp_index = labels2.index('ADJP')
                labels3 = get_child_labels(t[vp_index][adjp_index])
                if labels3.count('ADJP') >= 2:
                    '''
                    e.g.
                                ROOT                            
                                 |                               
                                 S                              
                      ___________|____________________________   
                     |                   VP                   | 
                     |    _______________|___                 |  
                     |   |                  ADJP              | 
                     |   |        ___________|________        |  
                     NP  |      ADJP         |       ADJP     | 
                     |   |       |           |        |       |  
                    PRP VBP     VBG          CC       JJ      . 
                     |   |       |           |        |       |  
                    You are hardworking     and  intelligent  . 
                    '''
                    print('found parallel ADJP')
                    for child in t[vp_index][adjp_index]:
                        if child.label() == 'ADJP':
                            subsentences.append(child.leaves())

        elif 'ADJP' in labels:
            index = labels.index('ADJP')
            labels2 = get_child_labels(t[index])
            if labels2.count('ADJP') >= 2:
                print('found parallel ADJP.')
                for child in t[index]:
                    if child.label() == 'ADJP':
                        subsentences.append(child.leaves())
        print('new sentences are :')
        for sent in subsentences:
            print(t[np_label].leaves(), sent)
            
        # todo: return n new trees representing the n new sentences or return n new string sentences.
        # todo: Rewrite code to fit more general cases. Right now, the logic is very 'hard-coded' and 'brute-force'.
        
    else:
        print('Sentence may be invalid.')
    

In [24]:
simple_find_parallel_sentence2(parsetree2[0])

NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
['You'] ['hardworking']
['You'] ['intelligent']


try another potentially erroneous parse tree...

In [25]:
parsed3 = parser.raw_parse('You are not hardworking and intelligent.')
collected = []
for i in parsed3:
    collected.append(i)
parsetree3 = collected[0]
parsetree3.pretty_print()

                     ROOT                       
                      |                          
                      S                         
  ____________________|_______________________   
 |       VP                                   | 
 |    ___|____________                        |  
 |   |               ADJP                     | 
 |   |        ________|_______________        |  
 NP  |      ADJP              |      ADJP     | 
 |   |    ___|________        |       |       |  
PRP VBP  RB          VBG      CC      JJ      . 
 |   |   |            |       |       |       |  
You are not      hardworking and intelligent  . 



In [26]:
simple_find_parallel_sentence2(parsetree3[0])

NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
['You'] ['not', 'hardworking']
['You'] ['intelligent']


The above sentence illustrates simple sentence ambiguity. The intended meaning of the sentence is 'you are not hardworking' and 'you are not inteligent'. This is a limitation of the parser. My sentence splitting code depends on the correctness of the parser. For now though, simple non-ambiguous cases should work with my sentence splitter. 

In [27]:
def get_parsetree(parser, sent):
    """
    Return parse tree from sentence.

    :param parser: an nltk.parse.corenlp.CoreNLPParser
    :returns: nltk.tree.Tree
    """
    parsed = parser.raw_parse(sent)
    collected = []
    for i in parsed:
        collected.append(i)
    parsetree = collected[0]
    return parsetree

In [28]:
parsetree4 = get_parsetree(parser, 'Bobby is happy and carefree.')
parsetree4.pretty_print()

           ROOT                      
            |                         
            S                        
   _________|______________________   
  |              VP                | 
  |     _________|___              |  
  NP   |            ADJP           | 
  |    |     ________|______       |  
 NNP  VBZ   JJ       CC     JJ     . 
  |    |    |        |      |      |  
Bobby  is happy     and  carefree  . 



In [29]:
simple_find_parallel_sentence2(parsetree4[0])

NP found.
NP and VP found on same level.
found ADJP under VP.
new sentences are :


JJ pairs are not accounted for. Need to modify simple_find_parallel_sentence2().

In [30]:
def simple_find_parallel_sentence3(t):
    """
    From a sentence, finds two sub-sentences describing the same subject in parallel.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."

    :param t: an nltk.tree.Tree
    :returns: 2-ple of nltk.tree.Tree
    """
    subsentences = []
    labels = get_child_labels(t)
    
    if 'NP' in labels:
        np_label = labels.index('NP')
        print('NP found.')
        if 'VP' in labels:
            print('NP and VP found on same level.')
            vp_index = labels.index('VP')
            labels2 = get_child_labels(t[vp_index])
            if labels2.count('VP') >= 2:
                '''
                e.g.
                
                '''
                print('found parallel VP.')
                for child in t[vp_index]:
                    if child.label() == 'VP':
                        subsentences.append(child.leaves())
            elif labels2.count('ADJP') == 1: 
                print('found ADJP under VP.')
                adjp_index = labels2.index('ADJP')
                labels3 = get_child_labels(t[vp_index][adjp_index])
                if labels3.count('ADJP') + labels3.count('JJ') >= 2:
                    '''
                    e.g.
                                ROOT                            
                                 |                               
                                 S                              
                      ___________|____________________________   
                     |                   VP                   | 
                     |    _______________|___                 |  
                     |   |                  ADJP              | 
                     |   |        ___________|________        |  
                     NP  |      ADJP         |       ADJP     | 
                     |   |       |           |        |       |  
                    PRP VBP     VBG          CC       JJ      . 
                     |   |       |           |        |       |  
                    You are hardworking     and  intelligent  . 
                    '''
                    print('found parallel ADJP')
                    for child in t[vp_index][adjp_index]:
                        if child.label() == 'ADJP' or child.label() == 'JJ':
                            subsentences.append(child.leaves())
                            

        elif 'ADJP' in labels:
            index = labels.index('ADJP')
            labels2 = get_child_labels(t[index])
            if labels2.count('ADJP') >= 2:
                print('found parallel ADJP.')
                for child in t[index]:
                    if child.label() == 'ADJP':
                        subsentences.append(child.leaves())
        print('new sentences are :')
        for sent in subsentences:
            print(t[np_label].leaves(), sent)
            
        # todo: return n new trees representing the n new sentences or return n new string sentences.
        # todo: Rewrite code to fit more general cases. Right now, the logic is very 'hard-coded' and 'brute-force'.
        
    else:
        print('Sentence may be invalid.')
    

In [31]:
simple_find_parallel_sentence3(parsetree4[0])

NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
['Bobby'] ['happy']
['Bobby'] ['carefree']


parsetree4 works now.

In [32]:
parsetree5 = get_parsetree(parser, 'Bobby is unusually happy and carefree.')
parsetree5.pretty_print()
simple_find_parallel_sentence3(parsetree5[0])

                     ROOT                 
                      |                    
                      S                   
   ___________________|_________________   
  |           VP                        | 
  |     ______|_______                  |  
  NP   |             ADJP               | 
  |    |       _______|__________       |  
 NNP  VBZ     RB      JJ   CC    JJ     . 
  |    |      |       |    |     |      |  
Bobby  is unusually happy and carefree  . 

NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
['Bobby'] ['happy']
['Bobby'] ['carefree']


In [33]:
parsetree6 = get_parsetree(parser, "Bobby is carefree and one of the most enigmatic persons I've known.")
parsetree6.pretty_print()
simple_find_parallel_sentence3(parsetree6[0])

                                      ROOT                                                  
                                       |                                                     
                                       S                                                    
        _______________________________|__________________________________________________   
       |                |                                S                                | 
       |                |         _______________________|______________________          |  
       |                |      NP-TMP                                   |       |         | 
       |                |    ____|_____                                 |       |         |  
       S                |   |          PP                               |       |         | 
   ____|___             |   |     _____|____                            |       |         |  
  |        VP           |   |    |          NP                   

The parser already detects 2 sentence - let's modify simple_find_parallel_sentences3() to cover this case.

In [34]:
def simple_find_parallel_sentence4(t):
    """
    From a sentence, finds two parallel sub-sentences describing the same subject.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."

    :param t: an nltk.tree.Tree
    :returns: 2-ple of nltk.tree.Tree
    """
    subsentences = []
    labels = get_child_labels(t)
    
    if labels.count('S') >= 2:
        print(">=2 'S' found on same level.")
        for child in t:
            if child.label() == 'S':
                subsentences.append(child.leaves())
        print('new sentences are :')
        for sent in subsentences:
            print(sent)
    elif 'NP' in labels:
        np_label = labels.index('NP')
        print('NP found.')
        if 'VP' in labels:
            print('NP and VP found on same level.')
            vp_index = labels.index('VP')
            labels2 = get_child_labels(t[vp_index])
            if labels2.count('VP') >= 2:
                print('found parallel VP.')
                for child in t[vp_index]:
                    if child.label() == 'VP':
                        subsentences.append(child.leaves())
            elif labels2.count('ADJP') == 1: 
                print('found ADJP under VP.')
                adjp_index = labels2.index('ADJP')
                labels3 = get_child_labels(t[vp_index][adjp_index])
                if labels3.count('ADJP') + labels3.count('JJ') >= 2:
                    '''
                    e.g.
                                ROOT                            
                                 |                               
                                 S                              
                      ___________|____________________________   
                     |                   VP                   | 
                     |    _______________|___                 |  
                     |   |                  ADJP              | 
                     |   |        ___________|________        |  
                     NP  |      ADJP         |       ADJP     | 
                     |   |       |           |        |       |  
                    PRP VBP     VBG          CC       JJ      . 
                     |   |       |           |        |       |  
                    You are hardworking     and  intelligent  . 
                    '''
                    print('found parallel ADJP')
                    for child in t[vp_index][adjp_index]:
                        if child.label() == 'ADJP' or child.label() == 'JJ':
                            subsentences.append(child.leaves())
                            

        elif 'ADJP' in labels:
            index = labels.index('ADJP')
            labels2 = get_child_labels(t[index])
            if labels2.count('ADJP') >= 2:
                print('found parallel ADJP.')
                for child in t[index]:
                    if child.label() == 'ADJP':
                        subsentences.append(child.leaves())
        print('new sentences are :')
        for sent in subsentences:
            print(t[np_label].leaves(), sent)
            
        # todo: return n new trees representing the n new sentences or return n new string sentences.
        # todo: Rewrite code to fit more general cases. Right now, the logic is very 'hard-coded' and 'brute-force'.
        
    else:
        print('Sentence may be invalid.')

In [35]:
parsetree7 = get_parsetree(parser, "Bobby is carefree and he is one of the most enigmatic persons I've known.")
parsetree7.pretty_print()
simple_find_parallel_sentence4(parsetree7[0])

                                       ROOT                                                  
                                        |                                                     
                                        S                                                    
        ________________________________|__________________________________________________   
       |                |       S                                                          | 
       |                |    ___|___                                                       |  
       |                |   |       VP                                                     | 
       |                |   |    ___|________                                              |  
       |                |   |   |            NP                                            | 
       |                |   |   |    ________|___________                                  |  
       |                |   |   |   |                  

The limitation of this particular case is that the parse tree does not identify the common subject. Although upon reading it, the human reader will be able to identify the common subject.

Now that we have modified the code to cover a few common cases, let's test the function on a set of new sentences.

In [36]:
parsetreex = get_parsetree(parser, "Bobby is an amazing father and close friend.")
parsetreex.pretty_print()
simple_find_parallel_sentence4(parsetreex[0])

parsetreex = get_parsetree(parser, "If you have aries in rising, you tend to be loud and superficial.")
parsetreex.pretty_print()
simple_find_parallel_sentence4(parsetreex[0])

                       ROOT                          
                        |                             
                        S                            
   _____________________|__________________________   
  |        VP                                      | 
  |     ___|____________                           |  
  |    |                NP                         | 
  |    |          ______|_______________           |  
  NP   |         NP           |         NP         | 
  |    |    _____|______      |     ____|____      |  
 NNP  VBZ  DT    JJ     NN    CC   JJ        NN    . 
  |    |   |     |      |     |    |         |     |  
Bobby  is  an amazing father and close     friend  . 

NP found.
NP and VP found on same level.
new sentences are :
                                          ROOT                                           
                                           |                                              
                                           S     

Clearly, there are a lot more structures that the simple sentence splitter does not cover. Syntax is complex indeed. 

In [37]:
parsetreex = get_parsetree(parser, "You are quick to grasp new concepts and equally quick to lose interest in an idea or project once your curiosity has been satisfied.")
parsetreex.pretty_print()
simple_find_parallel_sentence4(parsetreex[0])

                                                                          ROOT                                                                              
                                                                           |                                                                                 
                                                                           S                                                                                
  _________________________________________________________________________|______________________________________________________________________________   
 |                                     VP                                                                                                                 | 
 |    _________________________________|___________________________________                                                                               |  
 |   |                                                 

In [38]:
text = "If you have Mercury in Gemini, you have an extremely adaptable, energetic, active, alert, curious, and versatile mind."
parsetreex = get_parsetree(parser, text)
parsetreex.pretty_print()
simple_find_parallel_sentence4(parsetreex[0])

                                                                             ROOT                                                                       
                                                                              |                                                                          
                                                                              S                                                                         
          ____________________________________________________________________|_______________________________________________________________________   
        SBAR                             |   |                                                     |                                                  | 
  _______|______                         |   |                                                     |                                                  |  
 |              S                        |   |                                 

In [39]:
parsetreex = get_parsetree(parser, "You are hardworking and intelligent")
parsetreex.pretty_print()
simple_find_parallel_sentence4(parsetreex[0])

            ROOT                        
             |                           
             S                          
  ___________|_______                    
 |                   VP                 
 |    _______________|___                
 |   |                  ADJP            
 |   |        ___________|________       
 NP  |      ADJP         |       ADJP   
 |   |       |           |        |      
PRP VBP     VBG          CC       JJ    
 |   |       |           |        |      
You are hardworking     and  intelligent

NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
['You'] ['hardworking']
['You'] ['intelligent']


In [40]:
def simple_find_parallel_sentence5(t):
    """
    From a sentence, finds the earliest parallel sub-sentences describing the same subject, if it exists.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."
    
    Takes into account parallel noun phrases (NP, NN), parallel verbs (VP, V) and parallel adjectives (ADJP, JJ).
    Also takes into account parallel sentences (S). 
    
    :param t: an nltk.tree.Tree
    :returns: 2-ple of nltk.tree.Tree
    """
    subsentences = []
    labels = get_child_labels(t)
    
    if labels.count('S') >= 2:
        print(">=2 'S' found on same level.")
        for child in t:
            if child.label() == 'S':
                subsentences.append(child.leaves())
        print('new sentences are :')
        for sent in subsentences:
            print(sent)
    elif 'NP' in labels:
        np_label = labels.index('NP')
        print('NP found.')
        if 'VP' in labels:
            print('NP and VP found on same level.')
            vp_index = labels.index('VP')
            labels2 = get_child_labels(t[vp_index])
            if labels2.count('VP') >= 2:
                print('found parallel VP.')
                for child in t[vp_index]:
                    if child.label() == 'VP':
                        subsentences.append(child.leaves())
            elif labels2.count('ADJP') == 1: 
                print('found ADJP under VP.')
                adjp_index = labels2.index('ADJP')
                labels3 = get_child_labels(t[vp_index][adjp_index])
                if labels3.count('ADJP') + labels3.count('JJ') >= 2:
                    '''
                    e.g.
                                ROOT                            
                                 |                               
                                 S                              
                      ___________|____________________________   
                     |                   VP                   | 
                     |    _______________|___                 |  
                     |   |                  ADJP              | 
                     |   |        ___________|________        |  
                     NP  |      ADJP         |       ADJP     | 
                     |   |       |           |        |       |  
                    PRP VBP     VBG          CC       JJ      . 
                     |   |       |           |        |       |  
                    You are hardworking     and  intelligent  . 
                    '''
                    print('found parallel ADJP')
                    for child in t[vp_index][adjp_index]:
                        if child.label() == 'ADJP' or child.label() == 'JJ':
                            subsentences.append(child.leaves())
            elif labels2.count('NP') == 1:
                print('found NP under VP.')
                np_index = labels2.index('NP')
                labels3 = get_child_labels(t[vp_index][np_index])
                if labels3.count('NP') + labels3.count('NN') >= 2:
                    print('found parallel NP')
                    for child in t[vp_index][np_index]:
                        if child.label() == 'NP' or child.label() == 'NN':
                            subsentences.append(child.leaves())

        elif 'ADJP' in labels:
            index = labels.index('ADJP')
            labels2 = get_child_labels(t[index])
            if labels2.count('ADJP') >= 2:
                print('found parallel ADJP.')
                for child in t[index]:
                    if child.label() == 'ADJP':
                        subsentences.append(child.leaves())
        
        print('new sentences are :')
        for sent in subsentences:
            print(t[np_label].leaves(), sent)
            
        # todo: return n new trees representing the n new sentences or return n new string sentences.
        # todo: Rewrite code to fit more general cases. Right now, the logic is very 'hard-coded' and 'brute-force'.
        
    else:
        print('Sentence may be too complex.')

In [41]:
parsetreex = get_parsetree(parser, "Bobby is a good father and great friend.")
parsetreex.pretty_print()
simple_find_parallel_sentence5(parsetreex[0])

                    ROOT                          
                     |                             
                     S                            
   __________________|__________________________   
  |        VP                                   | 
  |     ___|_________                           |  
  |    |             NP                         | 
  |    |        _____|_______________           |  
  NP   |       NP          |         NP         | 
  |    |    ___|_____      |     ____|____      |  
 NNP  VBZ  DT  JJ    NN    CC   JJ        NN    . 
  |    |   |   |     |     |    |         |     |  
Bobby  is  a  good father and great     friend  . 

NP found.
NP and VP found on same level.
found NP under VP.
found parallel NP
new sentences are :
['Bobby'] ['a', 'good', 'father']
['Bobby'] ['great', 'friend']


In [42]:
parsetreex = get_parsetree(parser, "Bobby is not a good father and great friend.")
parsetreex.pretty_print()
simple_find_parallel_sentence5(parsetreex[0])

                  ROOT                                
                   |                                   
                   S                                  
   ________________|________________________________   
  |                VP                               | 
  |     ___________|_____                           |  
  |    |   |             NP                         | 
  |    |   |        _____|_______________           |  
  NP   |   |       NP          |         NP         | 
  |    |   |    ___|_____      |     ____|____      |  
 NNP  VBZ  RB  DT  JJ    NN    CC   JJ        NN    . 
  |    |   |   |   |     |     |    |         |     |  
Bobby  is not  a  good father and great     friend  . 

NP found.
NP and VP found on same level.
found NP under VP.
found parallel NP
new sentences are :
['Bobby'] ['a', 'good', 'father']
['Bobby'] ['great', 'friend']


Above case fails because VBZ+RB is not taken into account in the construction of the new sentences.

In [43]:
def simple_find_parallel_sentence6(t):
    """
    From a sentence, finds the earliest parallel sub-sentences describing the same subject, if it exists.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."
    
    Takes into account parallel noun phrases (NP, NN), parallel verbs (VP, V) and parallel adjectives (ADJP, JJ).
    Also takes into account parallel sentences (S). 
    
    :param t: an nltk.tree.Tree
    :returns: 2-ple of nltk.tree.Tree
    """
    subsentences = []
    qualifiers = []  # phrases that qualify the subsentences.
    labels = get_child_labels(t)
    
    if labels.count('S') >= 2:
        print(">=2 'S' found on same level.")
        for child in t:
            if child.label() == 'S':
                subsentences.append(child.leaves())
        print('new sentences are :')
        for sent in subsentences:
            print(sent)
    elif 'NP' in labels:
        np_label = labels.index('NP')
        print('NP found.')
        if 'VP' in labels:
            print('NP and VP found on same level.')
            vp_index = labels.index('VP')
            labels2 = get_child_labels(t[vp_index])
            
            # if a child of the first VP is not VP, NP or ADJP, it is likely some qualifier?  
            for child in t[vp_index]:
                if child.label() not in ['VP', 'NP', 'ADJP']:
                    qualifiers.append(child.leaves())
            
            if labels2.count('VP') >= 2:
                print('found parallel VP.')
                for child in t[vp_index]:
                    if child.label() == 'VP':
                        subsentences.append(child.leaves())
                    
            elif labels2.count('ADJP') == 1: 
                print('found ADJP under VP.')
                adjp_index = labels2.index('ADJP')
                labels3 = get_child_labels(t[vp_index][adjp_index])
                if labels3.count('ADJP') + labels3.count('JJ') >= 2:
                    '''
                    e.g.
                                ROOT                            
                                 |                               
                                 S                              
                      ___________|____________________________   
                     |                   VP                   | 
                     |    _______________|___                 |  
                     |   |                  ADJP              | 
                     |   |        ___________|________        |  
                     NP  |      ADJP         |       ADJP     | 
                     |   |       |           |        |       |  
                    PRP VBP     VBG          CC       JJ      . 
                     |   |       |           |        |       |  
                    You are hardworking     and  intelligent  . 
                    '''
                    print('found parallel ADJP')
                    for child in t[vp_index][adjp_index]:
                        if child.label() == 'ADJP' or child.label() == 'JJ':
                            subsentences.append(child.leaves())

            elif labels2.count('NP') == 1:
                print('found NP under VP.')
                np_index = labels2.index('NP')
                labels3 = get_child_labels(t[vp_index][np_index])
                if labels3.count('NP') + labels3.count('NN') >= 2:
                    print('found parallel NP')
                    for child in t[vp_index][np_index]:
                        if child.label() == 'NP' or child.label() == 'NN':
                            subsentences.append(child.leaves())

        elif 'ADJP' in labels:
            index = labels.index('ADJP')
            labels2 = get_child_labels(t[index])
            if labels2.count('ADJP') >= 2:
                print('found parallel ADJP.')
                for child in t[index]:
                    if child.label() == 'ADJP':
                        subsentences.append(child.leaves())
        
        print('new sentences are :')
        for sent in subsentences:
            print(t[np_label].leaves(), qualifiers, sent)
        print()
            
        # todo: return n new trees representing the n new sentences or return n new string sentences.
        # todo: Rewrite code to fit more general cases. Right now, the logic is very 'hard-coded' and 'brute-force'.
        # todo: consider the possibility of a recursive algo.
        
    else:
        print('Sentence structure not covered by function.')

In [44]:
parsetreex = get_parsetree(parser, "Bobby is a good father and great friend.")
parsetreex.pretty_print()
simple_find_parallel_sentence6(parsetreex[0])

                    ROOT                          
                     |                             
                     S                            
   __________________|__________________________   
  |        VP                                   | 
  |     ___|_________                           |  
  |    |             NP                         | 
  |    |        _____|_______________           |  
  NP   |       NP          |         NP         | 
  |    |    ___|_____      |     ____|____      |  
 NNP  VBZ  DT  JJ    NN    CC   JJ        NN    . 
  |    |   |   |     |     |    |         |     |  
Bobby  is  a  good father and great     friend  . 

NP found.
NP and VP found on same level.
found NP under VP.
found parallel NP
new sentences are :
['Bobby'] [['is']] ['a', 'good', 'father']
['Bobby'] [['is']] ['great', 'friend']



In [45]:
parsetreex = get_parsetree(parser, "Bobby is not a good father and great friend.")
parsetreex.pretty_print()
simple_find_parallel_sentence6(parsetreex[0])

                  ROOT                                
                   |                                   
                   S                                  
   ________________|________________________________   
  |                VP                               | 
  |     ___________|_____                           |  
  |    |   |             NP                         | 
  |    |   |        _____|_______________           |  
  NP   |   |       NP          |         NP         | 
  |    |   |    ___|_____      |     ____|____      |  
 NNP  VBZ  RB  DT  JJ    NN    CC   JJ        NN    . 
  |    |   |   |   |     |     |    |         |     |  
Bobby  is not  a  good father and great     friend  . 

NP found.
NP and VP found on same level.
found NP under VP.
found parallel NP
new sentences are :
['Bobby'] [['is'], ['not']] ['a', 'good', 'father']
['Bobby'] [['is'], ['not']] ['great', 'friend']



In [46]:
'''
TEST CASES - passing
'''

# parallel NN
parsetreex = get_parsetree(parser, "Bobby is a good father and great friend.")
simple_find_parallel_sentence6(parsetreex[0])

# parallel "negative" NN
parsetreex = get_parsetree(parser, "Bobby is not a good father and great friend.")
simple_find_parallel_sentence6(parsetreex[0])

# parallel ADJP
parsetreex = get_parsetree(parser, "Bobby is very handsome and super smart")
simple_find_parallel_sentence6(parsetreex[0])

# parallel ADJP AND JJ
parsetreex = get_parsetree(parser, "Bobby is very handsome and smart")
simple_find_parallel_sentence6(parsetreex[0])

# parallel VP
parsetreex = get_parsetree(parser, "You are quick to grasp new concepts and equally quick to lose interest in an idea or project once your curiosity has been satisfied.")
simple_find_parallel_sentence6(parsetreex[0])



NP found.
NP and VP found on same level.
found NP under VP.
found parallel NP
new sentences are :
['Bobby'] [['is']] ['a', 'good', 'father']
['Bobby'] [['is']] ['great', 'friend']

NP found.
NP and VP found on same level.
found NP under VP.
found parallel NP
new sentences are :
['Bobby'] [['is'], ['not']] ['a', 'good', 'father']
['Bobby'] [['is'], ['not']] ['great', 'friend']

NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
['Bobby'] [['is']] ['very', 'handsome']
['Bobby'] [['is']] ['super', 'smart']

NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
['Bobby'] [['is']] ['handsome']
['Bobby'] [['is']] ['smart']

NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
['You'] [['are']] ['quick', 'to', 'grasp', 'new', 'concepts']
['You'] [['are']] ['equally', 'quick', 'to', 'lose', 'interest', 'in', 'an', 'idea', 'or', 'project', 'once', '

is the "equally" adverb important? or we can drop it without much consequence for later? 

In [47]:
'''
TEST CASES - failing
'''

# conditional and multiple adjectives
text = "If you have Mercury in Gemini, you have an extremely adaptable, energetic, active, alert, curious, and versatile mind."
parsetreex = get_parsetree(parser, text)
simple_find_parallel_sentence6(parsetreex[0])

# conditional and 2 adjectives
text = "If you have aries in rising, you tend to be loud and superficial."
parsetreex = get_parsetree(parser, text)
parsetreex.pretty_print()
simple_find_parallel_sentence6(parsetreex[0])

NP found.
NP and VP found on same level.
found NP under VP.
new sentences are :

                                          ROOT                                           
                                           |                                              
                                           S                                             
          _________________________________|___________________________________________   
        SBAR                           |   |             |                             | 
  _______|_____                        |   |             |                             |  
 |             S                       |   |             VP                            | 
 |    _________|____                   |   |     ________|___                          |  
 |   |              VP                 |   |    |            S                         | 
 |   |    __________|___               |   |    |            |                         |  
 |   |   |    

In [48]:
def simple_find_parallel_sentence7(t):
    """
    From a sentence, finds the earliest parallel sub-sentences describing the same subject, if it exists.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."
    
    Takes into account parallel noun phrases (NP, NN), parallel verbs (VP, V) and parallel adjectives (ADJP, JJ).
    Also takes parallel sentences (S) into account .
    Also takes conditions ("if" or SBAR) into account.
    
    :param t: an nltk.tree.Tree
    :returns: 2-ple of nltk.tree.Tree
    """
    preconditions = []  # conditions. e.g. "if you are x".
    qualifiers = []  # phrases that qualify the subsentences.
    subsentences = []
    labels = get_child_labels(t)
    
    for child in t:
        if child.label() == 'SBAR':
            print('precondition found.')
            preconditions.append(child.leaves())
    
    if labels.count('S') >= 2:
        print(">=2 'S' found on same level.")
        for child in t:
            if child.label() == 'S':
                subsentences.append(child.leaves())
        print('new sentences are :')
        for sent in subsentences:
            print(preconditions, sent)
    elif 'NP' in labels:
        np_label = labels.index('NP')
        print('NP found.')
        if 'VP' in labels:
            print('NP and VP found on same level.')
            vp_index = labels.index('VP')
            labels2 = get_child_labels(t[vp_index])
            
            # if a child of the first VP is not VP, NP or ADJP, it is likely some qualifier?  
            for child in t[vp_index]:
                if child.label() not in ['VP', 'NP', 'ADJP']:
                    qualifiers.append(child.leaves())
            
            if labels2.count('VP') >= 2:
                print('found parallel VP.')
                for child in t[vp_index]:
                    if child.label() == 'VP':
                        subsentences.append(child.leaves())
                    
            elif labels2.count('ADJP') == 1: 
                print('found ADJP under VP.')
                adjp_index = labels2.index('ADJP')
                labels3 = get_child_labels(t[vp_index][adjp_index])
                if labels3.count('ADJP') + labels3.count('JJ') >= 2:
                    '''
                    e.g.
                                ROOT                            
                                 |                               
                                 S                              
                      ___________|____________________________   
                     |                   VP                   | 
                     |    _______________|___                 |  
                     |   |                  ADJP              | 
                     |   |        ___________|________        |  
                     NP  |      ADJP         |       ADJP     | 
                     |   |       |           |        |       |  
                    PRP VBP     VBG          CC       JJ      . 
                     |   |       |           |        |       |  
                    You are hardworking     and  intelligent  . 
                    '''
                    print('found parallel ADJP')
                    for child in t[vp_index][adjp_index]:
                        if child.label() == 'ADJP' or child.label() == 'JJ':
                            subsentences.append(child.leaves())

            elif labels2.count('NP') == 1:
                print('found NP under VP.')
                np_index = labels2.index('NP')
                labels3 = get_child_labels(t[vp_index][np_index])
                if labels3.count('NP') + labels3.count('NN') >= 2:
                    print('found parallel NP')
                    for child in t[vp_index][np_index]:
                        if child.label() == 'NP' or child.label() == 'NN':
                            subsentences.append(child.leaves())

        elif 'ADJP' in labels:
            index = labels.index('ADJP')
            labels2 = get_child_labels(t[index])
            if labels2.count('ADJP') >= 2:
                print('found parallel ADJP.')
                for child in t[index]:
                    if child.label() == 'ADJP':
                        subsentences.append(child.leaves())
        
        print('new sentences are :')
        for sent in subsentences:
            print(preconditions, t[np_label].leaves(), qualifiers, sent)
        print()
            
        # todo: return n new trees representing the n new sentences or return n new string sentences.
        # todo: Rewrite code to fit more general cases. Right now, the logic is very 'hard-coded'.
        # todo: consider the possibility of a recursive algo.
        
    else:
        print('Sentence structure not covered by function.')

In [49]:
'''
TEST CASES
'''

# conditional and 2 adjectives.
text = "If you have aries in rising, you are loud and superficial."
parsetreex = get_parsetree(parser, text)
parsetreex.pretty_print()
simple_find_parallel_sentence7(parsetreex[0])

# conditional and 2 "nested" adjectives.
text = "If you have aries in rising, you tend to be loud and superficial."
parsetreex = get_parsetree(parser, text)
parsetreex.pretty_print()
simple_find_parallel_sentence7(parsetreex[0])

                                      ROOT                                      
                                       |                                         
                                       S                                        
          _____________________________|______________________________________   
        SBAR                           |    |            |                    | 
  _______|_____                        |    |            |                    |  
 |             S                       |    |            |                    | 
 |    _________|____                   |    |            |                    |  
 |   |              VP                 |    |            |                    | 
 |   |    __________|___               |    |            |                    |  
 |   |   |              NP             |    |            |                    | 
 |   |   |      ________|___           |    |            |                    |  
 |   |   |     |      

`simple_find_parallel_sentence7()` gives us a splitting function that covers common cases, which is great. However, there are still a large, unknown number of complex sentence structures that it does not cover. We shall have to decide if splitting is important enough that we need to spend more time refining the function. If the common simple cases are all we need to cover, then we can stop spending time refining this and proceed to other aspects/stages of the larger project.

In [183]:
def test(parser, func, text, pprint=False, expected=None):
    print('input:'), 
    print(text + '\n>>>')
    parsetreex = get_parsetree(parser, text)
    if pprint == True:
        parsetreex.pretty_print()
    
    result = func(parsetreex[0])
    print('output:') 
    print(result)
    print()
    if expected:
        if result == expected:
            print('PASSING\n')
        else:
            print('FAILING\n')

In [184]:
'''
TEST CASES - simple
'''

# parallel NN -> PASS
text = "Bobby is a good father and great friend."
test(parser, simple_find_parallel_sentence7, text)

# parallel "negative" NN -> PASS
text = "Bobby is not a good father and great friend."
test(parser, simple_find_parallel_sentence7, text)

# parallel ADJP -> PASS
text = "Bobby is very handsome and super smart"
test(parser, simple_find_parallel_sentence7, text)

# parallel ADJP AND JJ -> PASS
text = "Bobby is very handsome and smart"
test(parser, simple_find_parallel_sentence7, text)

# parallel VP -> PASS
text = "You are quick to grasp new concepts and equally quick to lose interest in an idea or project once your curiosity has been satisfied."
test(parser, simple_find_parallel_sentence7, text)


'''
MORE TEST CASES - with conditional statements.
'''

# conditional and 2 adjectives -> PASS
text = "If you have aries in rising, you are loud and superficial."
test(parser, simple_find_parallel_sentence7, text)

# conditional and 2 "nested" adjectives -> FAIL.
text = "If you have aries in rising, you tend to be loud and superficial"
test(parser, simple_find_parallel_sentence7, text)

input:
Bobby is a good father and great friend.
>>>
NP found.
NP and VP found on same level.
found NP under VP.
found parallel NP
new sentences are :
[] ['Bobby'] [['is']] ['a', 'good', 'father']
[] ['Bobby'] [['is']] ['great', 'friend']

output:
None

input:
Bobby is not a good father and great friend.
>>>
NP found.
NP and VP found on same level.
found NP under VP.
found parallel NP
new sentences are :
[] ['Bobby'] [['is'], ['not']] ['a', 'good', 'father']
[] ['Bobby'] [['is'], ['not']] ['great', 'friend']

output:
None

input:
Bobby is very handsome and super smart
>>>
NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
[] ['Bobby'] [['is']] ['very', 'handsome']
[] ['Bobby'] [['is']] ['super', 'smart']

output:
None

input:
Bobby is very handsome and smart
>>>
NP found.
NP and VP found on same level.
found ADJP under VP.
found parallel ADJP
new sentences are :
[] ['Bobby'] [['is']] ['handsome']
[] ['Bobby'] [['is']] ['smart']

output:

In [173]:
text = 'if you are A, then you tend to be funny and smart.'
tmp_t = get_parsetree(parser, text)
tmp_t.pretty_print()
test(parser, simple_find_parallel_sentence7, text)

                                  ROOT                                      
                                   |                                         
                                   S                                        
          _________________________|______________________________________   
         |                |   |    |             VP                       | 
         |                |   |    |     ________|____                    |  
        SBAR              |   |    |    |             S                   | 
  _______|____            |   |    |    |             |                   |  
 |            S           |   |    |    |             VP                  | 
 |    ________|___        |   |    |    |     ________|____               |  
 |   |            VP      |   |    |    |    |             VP             | 
 |   |         ___|___    |   |    |    |    |    _________|___           |  
 |   NP       |       NP  |  ADVP  NP   |    |   |            ADJP    

In [174]:
def find_sub(t):
    """
    :param t: an nltk.tree.Tree
    :returns: list. a list representing parallel subcomponents.
    """
    commons = []
    subcomponents = []
    
    if t.height() > 2 and 'CC' in get_child_labels(t):
        # when height == 2, we only have leaves (string type) left in the tree.
        for child in t:
            if child.label() != 'CC':
                subcomponents.append(child.leaves())
    else:
        for child in t:
            if type(child) == Tree:
                results = find_sub(child)
                commons.extend(results[0])
                subcomponents.extend(results[1])
                #print(subcomponents)
            else:
                # a leaf (a string type)
                commons.append(child)
                
    return commons, subcomponents

In [178]:
def simple_find_parallel_sentence8(t):
    """
    From a sentence, finds, generates and prints parallel sub-sentences describing the same subject, if they exist.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."
    
    Looks for parallel noun phrases (NP, NN), parallel verbs (VP, V) and parallel adjectives (ADJP, JJ).
    Parallel phrases means these phrases are children of the same node.
    Also takes parallel sentences (S) into account.
    Also takes conditions ("if" or SBAR) into account.
    
    This iteration attempts to use a recursive approach to find the parallel parts once it finds the first 
    VP co-occuring (parallel) with an NP.
    
    :param t: an nltk.tree.Tree
    :returns: list of simpler sentences.
    """
    preconditions = []  # conditions. e.g. "if you are x".
    qualifiers = []  # phrases that qualify the parallel VPs.
    subsentences = []
    final_sents = []
    
    labels = get_child_labels(t)
    
    for child in t:
        if child.label() == 'SBAR':
            #print('precondition found.')
            preconditions.extend(child.leaves())
    
    if labels.count('S') >= 2:
        #print(">=2 'S' found on same level.")
        for child in t:
            if child.label() == 'S':
                subsentences.append(child.leaves())
        #print('new sentences are :')
        for sent in subsentences:
            final_sent_arr = []
            final_sent_arr.extend(preconditions)
            #final_sent_arr.extend(t[np_label].leaves()) # skipped
            final_sent_arr.extend(qualifiers)
            final_sent_arr.extend(sent)
            final_sent = " ".join(final_sent_arr) + '.'
            final_sents.append(final_sent)
    
    elif 'NP' in labels:
        np_label = labels.index('NP')
        #print('NP found.')
        
        if 'VP' in labels:
            #print('NP and VP found on same level (under same node).')
            vp_index = labels.index('VP')
            labels2 = get_child_labels(t[vp_index])
            
            # if a child of the first VP is not VP, NP or ADJP, it is likely some qualifier (e.g. a VBP)?  
            #for child in t[vp_index]:
            #    if child.label() not in ['VP', 'NP', 'ADJP', 'S']:
            #        qualifiers.append(child.leaves())
            
            # check if 'CC' exists; todo: refactor?.
            leaves_pos = t[vp_index].pos()
            cc_exists = False
            for pair in leaves_pos:
                if pair[1] in ['CC']:
                    cc_exists = True
                    break
                else: 
                    cc_exists == False
            
            # recurse until 'CC' child is found or leaf is reached, then return and accumulate any parallel parts found.  
            if cc_exists:
                results = find_sub(t[vp_index])
                qualifiers.extend(results[0])
                subsentences.extend(results[1])

                #print('new sentences are :')
                for sent in subsentences:
                    final_sent_arr = []
                    final_sent_arr.extend(preconditions)
                    final_sent_arr.extend(t[np_label].leaves())
                    final_sent_arr.extend(qualifiers)
                    final_sent_arr.extend(sent)
                    final_sent = " ".join(final_sent_arr) + '.'
                    final_sents.append(final_sent)
                    #print(final_sent)
                #print()
                return final_sents
            else:
                Exception('Sentence not decomposable.')
        
    else:
        raise Exception('Sentence structure not covered by function.')

In [185]:
text = 'if you are A, then you tend to be very funny and highly intelligent.'
test(parser, simple_find_parallel_sentence8, text)

input:
if you are A, then you tend to be very funny and highly intelligent.
>>>
output:
['if you are A you tend to be very funny.', 'if you are A you tend to be highly intelligent.']



In [181]:
text = 'if you are A and B, then you tend to be very funny and highly intelligent.'
test(parser, simple_find_parallel_sentence8, text)

input:
 if you are A and B, then you tend to be very funny and highly intelligent. 
>>>
output:
['if you are A and B , then you tend to be very funny.', 'if you are A and B , then you tend to be highly intelligent.']


In [182]:
'''
TEST CASES - simple
'''

# parallel NN -> PASS
text = "Bobby is a good father and great friend."
test(parser, simple_find_parallel_sentence8, text)

# parallel "negative" NN -> PASS
text = "Bobby is not a good father and great friend."
test(parser, simple_find_parallel_sentence8, text)

# parallel ADJP -> PASS
text = "Bobby is very handsome and super smart"
test(parser, simple_find_parallel_sentence8, text)

# parallel ADJP AND JJ -> FAIL
text = "Bobby is very handsome and smart"
test(parser, simple_find_parallel_sentence8, text, pprint=True)

# parallel VP -> PASS
text = "You are quick to grasp new concepts and equally quick to lose interest in an idea or project once your curiosity has been satisfied."
test(parser, simple_find_parallel_sentence8, text)


'''
MORE TEST CASES - with conditional statements.
'''

# conditional and 2 adjectives -> PASS
text = "If you have aries in rising, you are loud and superficial."
test(parser, simple_find_parallel_sentence8, text)

# conditional and 2 "nested" adjectives -> PASS.
text = "If you have aries in rising, you tend to be loud and superficial"
test(parser, simple_find_parallel_sentence8, text)

input:
 Bobby is a good father and great friend. 
>>>
output:
['Bobby is a good father.', 'Bobby is great friend.']
input:
 Bobby is not a good father and great friend. 
>>>
output:
['Bobby is not a good father.', 'Bobby is not great friend.']
input:
 Bobby is very handsome and super smart 
>>>
output:
['Bobby is very handsome.', 'Bobby is super smart.']
input:
 Bobby is very handsome and smart 
>>>
      ROOT                        
       |                           
       S                          
   ____|____                       
  |         VP                    
  |     ____|______                
  NP   |          ADJP            
  |    |     ______|___________    
 NNP  VBZ   RB     JJ     CC   JJ 
  |    |    |      |      |    |   
Bobby  is  very handsome and smart

output:
['Bobby is very.', 'Bobby is handsome.', 'Bobby is smart.']
input:
 You are quick to grasp new concepts and equally quick to lose interest in an idea or project once your curiosity has been satisfie

In [146]:
# test
text = "You strive for perfection and can be quite the person to live with or to be around because your standards for yourself and others are so high."
test(parser, simple_find_parallel_sentence8, text)

input :  You strive for perfection and can be quite the person to live with or to be around because your standards for yourself and others are so high. 
>>>
NP found.
NP and VP found on same level (under same node).
new sentences are :
[] ['You'] [] ['strive', 'for', 'perfection']
[] ['You'] [] ['can', 'be', 'quite', 'the', 'person', 'to', 'live', 'with', 'or', 'to', 'be', 'around', 'because', 'your', 'standards', 'for', 'yourself', 'and', 'others', 'are', 'so', 'high']



In [147]:
# test
text = "You take slights very personally and though you may forgive a mistake by a friend or loved one, you never forget it."
test(parser, simple_find_parallel_sentence8, text)

input :  You take slights very personally and though you may forgive a mistake by a friend or loved one, you never forget it. 
>>>
>=2 'S' found on same level.
new sentences are :
[] ['You', 'take', 'slights', 'very', 'personally']
[] ['though', 'you', 'may', 'forgive', 'a', 'mistake', 'by', 'a', 'friend', 'or', 'loved', 'one', ',', 'you', 'never', 'forget', 'it']


In [148]:
# test
text = "Your senses and emotions are quite strong and it would be wise for you to listen to your intuitive side as you can be quite psychic."
test(parser, simple_find_parallel_sentence8, text)

input :  Your senses and emotions are quite strong and it would be wise for you to listen to your intuitive side as you can be quite psychic. 
>>>
>=2 'S' found on same level.
new sentences are :
[] ['Your', 'senses', 'and', 'emotions', 'are', 'quite', 'strong']
[] ['it', 'would', 'be', 'wise', 'for', 'you', 'to', 'listen', 'to', 'your', 'intuitive', 'side', 'as', 'you', 'can', 'be', 'quite', 'psychic']
