Run first the [setup notebook](./00-setup.ipynb)

In [1]:
# --- Imports ---
import nltk, html
from nltk import pos_tag, word_tokenize, RegexpParser
from IPython.display import display, HTML
import ipywidgets as widgets
import spacy

# --- spacy setup ---
nlp_spacy = spacy.load('en_core_web_sm')

In [None]:
# --- Define grammar for noun phrases ---
grammar = r"""
  NP: {<DET>?<ADJ>*<NOUN>}
"""
parser = RegexpParser(grammar)

# --- Example sentences ---
examples = {
    "1️⃣  The classic fox": "The quick brown fox jumps over the lazy dog.",
    "2️⃣  The book on the table": "The old book on the wooden table belongs to Sarah.",
    "3️⃣  The AI revolution": "Modern artificial intelligence systems transform industries worldwide.",
    "4️⃣  The conference scene": "Researchers presented innovative papers at the NLP conference.",
    "5️⃣  The riverbank": "The children played near the quiet river bank under the blue sky."
}

# --- Output widget for dynamic display ---
output = widgets.Output()

# --- Sentence analysis function ---
def analyze_sentence(sentence):
    tokens = nlp_spacy(sentence)
    pos_tags = [(token.text, token.pos_) for token in tokens]
    tree = parser.parse(pos_tags)

    # Build POS-tag table (left column)
    table_html = "<table border='1' style='border-collapse:collapse;'><tr><th>Word</th><th>POS Tag</th></tr>"
    for word, tag in pos_tags:
        table_html += f"<tr><td>{word}</td><td>{tag}</td></tr>"
    table_html += "</table>"

    # Extract noun phrases
    noun_phrases = [
        " ".join(word for word, tag in subtree.leaves())
        for subtree in tree.subtrees(filter=lambda t: t.label() == 'NP')
    ]

    # Parse tree as SVG
    tree_html = tree._repr_svg_()

    # Combined HTML layout
    html_output = f"""
    <p><b>{sentence}</b></p>
    <div style="display: flex; gap: 40px;">
      <div style="flex: 1;">{table_html}</div>
      <div style="flex: 3;">{tree_html}</div>
    </div>
    <br>
    <h4>Extracted Noun Phrases: {html.escape(grammar)}</h4>
    <ul>{''.join(f'<li>{np}</li>' for np in noun_phrases)}</ul>
    """
    display(HTML(html_output))

# --- Handler for dropdown changes ---
def on_sentence_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        with output:
            output.clear_output(wait=True)
            analyze_sentence(change['new'])

# --- Dropdown widget ---
sentence_selector = widgets.Dropdown(
    options=examples,
    value=None,
    description='Select Sentence:',
    style={'description_width': '150px'},
    layout=widgets.Layout(width='700px')
)
sentence_selector.observe(on_sentence_change)

# --- Display interface ---
display(HTML(f"<h2>Noun Phrase Chunking with NLTK RegexpParser</h2>"))
display(sentence_selector, output)


Dropdown(description='Select Sentence:', layout=Layout(width='700px'), options={'1️⃣  The classic fox': 'The q…

Output()

---