# Learn syntax with Python

## Set up

Dependencies:
- nltk
- ghostscripts
- imagemagic

In [1]:
import os

import nltk
from nltk.draw import TreeView

## Define grammar

In [2]:

grammar = """
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
"""

grammar = nltk.CFG.fromstring(grammar)

## Simple parser

In [10]:
class SimpleParser(nltk.ChartParser):
    def __init__(self, grammar):
        super().__init__(grammar)
    
    def parse_sent(self, sent, print_tree=False):
        trees = []
        for tree in self.parse(sent):
            trees.append(tree)
            if print_tree:
                tree.pretty_print()
        
        return trees
    
parser = SimpleParser(grammar)

Some utilities

In [22]:
def save_trees(trees, save_dir):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    for i, tree in enumerate(trees):
        filename = "tree_{}".format(i + 1)
        ps_path = os.path.join(save_dir, filename + ".ps")
        print(ps_path)
        png_path = os.path.join(save_dir, filename + ".png")
        TreeView(tree)._cframe.print_to_file(ps_path)
        ps_to_png(ps_path, png_path)
        os.remove(ps_path)

def ps_to_png(ps_file, png_file, density=300, background="white"):
    cmd = "convert -density {} -background {} -flatten {} {}".format(density, background,
                                                                     ps_file, png_file)
    os.system(cmd)

## Parse sentences

In [11]:
sent = "I shot an elephant in my pajamas"
sent = list(filter(None, sent.split(" ")))
print(sent)

['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']


In [12]:
trees = parser.parse_sent(sent, print_tree=True)

     S                                       
  ___|______________                          
 |                  VP                       
 |         _________|__________               
 |        VP                   PP            
 |    ____|___              ___|___           
 |   |        NP           |       NP        
 |   |     ___|_____       |    ___|_____     
 NP  V   Det        N      P  Det        N   
 |   |    |         |      |   |         |    
 I  shot  an     elephant  in  my     pajamas

     S                                   
  ___|__________                          
 |              VP                       
 |    __________|______                   
 |   |                 NP                
 |   |     ____________|___               
 |   |    |     |          PP            
 |   |    |     |       ___|___           
 |   |    |     |      |       NP        
 |   |    |     |      |    ___|_____     
 NP  V   Det    N      P  Det        N   
 |   |    |     |    

## Save to images

Save directory should be absolute path.

In [None]:
save_dir = ""
save_trees(trees, save_dir)