This notebook demonstrates usage of different nltk Tree methods

In [2]:
import nltk

In [3]:
from nltk.tree import Tree

In [4]:
t = Tree(1, [2, Tree(3, [4]), 5])
print(t)

(1 2 (3 4) 5)


In [5]:
s = Tree.fromstring("(S (NP I) (VP (V saw) (NP him)))")
print(s)

(S (NP I) (VP (V saw) (NP him)))


In [6]:
s.pretty_print()

     S         
  ___|___       
 |       VP    
 |    ___|___   
 NP  V       NP
 |   |       |  
 I  saw     him



In [7]:
output = "(ROOT (S(NP (PRP You))(VP(VP (VBP grasp)(NP (NNS concepts))(ADVP (RB easily)))(CC and)(VP (MD may)(VP (VB become)(ADJP (JJ impatient))(PP (IN with)(NP(NP (DT those))(SBAR(WHNP (WP who))(S(VP (VBP do) (RB n't)(VP (VB learn)(ADVP (RB as) (RB quickly)))))))))))(. .)))"

In [8]:
parsetree = Tree.fromstring(output)
print(parsetree)

(ROOT
  (S
    (NP (PRP You))
    (VP
      (VP (VBP grasp) (NP (NNS concepts)) (ADVP (RB easily)))
      (CC and)
      (VP
        (MD may)
        (VP
          (VB become)
          (ADJP (JJ impatient))
          (PP
            (IN with)
            (NP
              (NP (DT those))
              (SBAR
                (WHNP (WP who))
                (S
                  (VP
                    (VBP do)
                    (RB n't)
                    (VP (VB learn) (ADVP (RB as) (RB quickly)))))))))))
    (. .)))


In [8]:
parsetree.pretty_print()

                                                   ROOT                                               
                                                    |                                                  
                                                    S                                                 
  __________________________________________________|_______________________________________________   
 |                         VP                                                                       | 
 |            _____________|________                                                                |  
 |           |             |        VP                                                              | 
 |           |             |    ____|________                                                       |  
 |           |             |   |             VP                                                     | 
 |           |             |   |     ________|____________           

In [29]:
# access children (which are also trees)
parsetree[0][1][2][1][2].pretty_print()

       PP                                      
  _____|____                                    
 |          NP                                 
 |      ____|________                           
 |     |            SBAR                       
 |     |     ________|_____                     
 |     |    |              S                   
 |     |    |              |                    
 |     |    |              VP                  
 |     |    |     _________|____                
 |     |    |    |   |          VP             
 |     |    |    |   |      ____|___            
 |     NP  WHNP  |   |     |       ADVP        
 |     |    |    |   |     |     ___|______     
 IN    DT   WP  VBP  RB    VB   RB         RB  
 |     |    |    |   |     |    |          |    
with those who   do n't  learn  as      quickly



In [27]:
parsetree[0][1][2][1][2].label()

'PP'

In [28]:
parsetree.productions()

[ROOT -> S,
 S -> NP VP .,
 NP -> PRP,
 PRP -> 'You',
 VP -> VP CC VP,
 VP -> VBP NP ADVP,
 VBP -> 'grasp',
 NP -> NNS,
 NNS -> 'concepts',
 ADVP -> RB,
 RB -> 'easily',
 CC -> 'and',
 VP -> MD VP,
 MD -> 'may',
 VP -> VB ADJP PP,
 VB -> 'become',
 ADJP -> JJ,
 JJ -> 'impatient',
 PP -> IN NP,
 IN -> 'with',
 NP -> NP SBAR,
 NP -> DT,
 DT -> 'those',
 SBAR -> WHNP S,
 WHNP -> WP,
 WP -> 'who',
 S -> VP,
 VP -> VBP RB VP,
 VBP -> 'do',
 RB -> "n't",
 VP -> VB ADVP,
 VB -> 'learn',
 ADVP -> RB RB,
 RB -> 'as',
 RB -> 'quickly',
 . -> '.']

In [43]:
def get_child_labels(t):
    """
    :param t: an nltk.tree.Tree
    :returns: list of labels of the children.
    """
    labels = []
    for child in t:
        labels.append(child.label())
        
    return labels

In [62]:
def simple_find_parallel_sentence(t):
    """
    From a sentence, finds two sub-sentences describing the same subject in parallel.

        e.g. "You grasp concepts easily and may become impatient with those who don't learn as quickly."
        can be broken down into "You grasp concepts easily" and "You may become impatient with those who don't learn as quickly."

        :param t: an nltk.tree.Tree
        :returns: 2-ple of nltk.tree.Tree
    """
    subsentences = []
    labels = get_child_labels(t)
    
    if 'NP' in labels:
        np_label = labels.index('NP')
        print('NP found.')
        if 'VP' in labels:
            index = labels.index('VP')
            labels2 = get_child_labels(t[index])
            if labels2.count('VP') >= 2:
                print('found parallel VP subssentences.')
                for child in t[index]:
                    if child.label() == 'VP':
                        subsentences.append(child.leaves())
        elif 'AP' in labels:
            index = labels.index('AP')
            labels2 = get_child_labels(t[index])
            if labels2.count('AP') >= 2:
                print('found parallel AP sub-sentences.')
                for child in t[index]:
                    if child.label() == 'AP':
                        subsentences.append(child.leaves())
        print('new sentences are :')
        for sent in subsentences:
            print(t[np_label].leaves(), sent)
            
        # todo: return n new trees representing the n new sentences or return n new string sentences.
        
    else:
        print('Sentence may be invalid.')
    

In [63]:
simple_find_parallel_sentence(parsetree[0])

NP found.
found parallel VP subssentences.
new sentences are :
['You'] ['grasp', 'concepts', 'easily']
['You'] ['may', 'become', 'impatient', 'with', 'those', 'who', 'do', "n't", 'learn', 'as', 'quickly']
