# Red Hat Libs

From an arbitrary English text, produce a __Mad Libs__-style result: Replace some parts of speech in the original text with corresponding parts of speech from a dictionary. The number of blanks — that is, the number of words that will be replaced from words in the dictionary — is controlled by the **Madness** setting.

## What's going on here?

This example uses the NLTK library to [tokenize and tag parts of speech](#Tag-the-parts-of-speech). Next, we [choose which words to turn into blanks](#Create-blanks-in-the-tagged-text). Then we [choose replacement words from the dictionary](#Choose-replacement-words-from-the-dictionary) and [display the resulting combined text](#Display-the-original-text-with-the-chosen-substitutions).

In [406]:
pip install nltk

You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [407]:
# Set up the environment
import nltk

import ipywidgets as widgets
from ipywidgets import interact
from IPython import get_ipython
from IPython.display import display
from IPython.display import Javascript

defaultSourceText = """Oh say can you see, by the dawn's early light, what so proudly we hailed at the twilight's last gleaming?"""
defaultDictionaryName = "shakespeareDict.txt"

if get_ipython():
    nltk.download('tagsets')
    nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')
    
# populate the user-friendly terminology for parts of speech
posDict = {}
with open("nltk-tagsets-pos.txt") as f:
    for line in f:
        row = line.split(" : ")
        key = row[0]
        val = row[1]
        posDict[key] = val.strip()
      
def human_readable_pos_tagged_tuple( tupe ): # a tuple
    word, pos = tupe
    if pos in posDict:
        pos = posDict[pos]
    else:
        pos = "stet"
    return ( word, pos )

pos_tagged_list = list()
template = list()
filled_template = list()

# set up UI elements
madnessSlider = widgets.IntSlider(min=0, max=10, step=1, description='Madness:', value=3)
sourceTextInput = widgets.Textarea(value=defaultSourceText, description='Source text:')
sourceTextInput.layout.height = '130px'
sourceTextInput.layout.width = '100%'

[nltk_data] Downloading package tagsets to /opt/app-
[nltk_data]     root/src/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package punkt to /opt/app-
[nltk_data]     root/src/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /opt/app-root/src/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


## Tag the parts of speech

In [408]:
import time

blankButton = widgets.Button(description="Click to choose blanks (and run all subsequent steps)")
displayButton = widgets.Button()

def tag_parts_of_speech(sourceText=defaultSourceText):

    Sentence = nltk.word_tokenize(sourceText)
    posTagged = nltk.pos_tag(Sentence)

    resultList = map(human_readable_pos_tagged_tuple, posTagged)
    return list(resultList)


display(sourceTextInput)
display(madnessSlider)

tagButton = widgets.Button(description="Click to tag the source text (and run all subsequent steps)")
tagButton.layout.width = '400px'
output1 = widgets.Output()
display(tagButton, output1)

def on_tag_button_clicked(b):
    new_tags = tag_parts_of_speech(sourceTextInput.value)
    pos_tagged_list.clear()
    pos_tagged_list.extend(new_tags)
    with output1:
        print("--- " + time.ctime())
        print(pos_tagged_list)
        on_blank_button_clicked(blankButton)
        #display(Javascript('IPython.notebook.execute_cells_below()'))
        
tagButton.on_click(on_tag_button_clicked)
#on_tag_button_clicked(tagButton)


## Create blanks in the tagged text

The **Madness** setting controls how many blanks will be created.

In [465]:
display(madnessSlider)

import random

def is_non_stet_pos_tuple( tupe ):
    return 1 * ( tupe[1] != "stet")

def indexed_pos_tuple(tupe, num ):
    return ( num, tupe[1] )

def create_blanks_from_pos_tagged_list(madness):
    indexed_list = map(indexed_pos_tuple, pos_tagged_list, range(len(pos_tagged_list)))
    
    indexed_pos_list = map(indexed_pos_tuple, pos_tagged_list, range(len(pos_tagged_list)))
    non_stet_indexed_pos_list = list(filter(is_non_stet_pos_tuple, indexed_pos_list))
    num_candidates = len(non_stet_indexed_pos_list)
    blanks = list()
    if (madness > 10): # error avoidance
        madness = 10
    if (madness > 0):
        num_desired_blanks = int(float(madness/10) * num_candidates)
        blanks = random.sample(non_stet_indexed_pos_list, num_desired_blanks)
        blanks.sort(key = lambda tup: tup[0])

    return blanks

blankButton.layout.width = '350px'
output2 = widgets.Output()
display(blankButton, output2)

def on_blank_button_clicked(b):
    new_template = create_blanks_from_pos_tagged_list(madnessSlider.value)
    template.clear()
    template.extend(new_template)
    with output2:
        print("--- " + time.ctime())
        print(template)
        show_result(displayButton)
        #display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))
        
blankButton.on_click(on_blank_button_clicked)
#on_blank_button_clicked(blankButton)

## Choose replacement words from the dictionary

In [474]:
dictionary = {}

# load the dictionary
def load_dictionary(file_name=defaultDictionaryName):
    new_dict = {}
    with open(file_name) as f:
        for line in f:
            if (line == "<dictionary>\n" or line == "</dictionary>\n"):
                continue
            row = line.split(" : ")
            key = row[0]
            val = row[1]
            new_dict[key] = val.split(", ")
    dictionary.clear()
    dictionary.update(new_dict)
    for entry in dictionary:
        print("Dictionary " + file_name + " has " + str(len(dictionary[entry])) + " entries for " + entry)
        
load_dictionary() 

Dictionary shakespeareDict.txt has 74 entries for exclamation
Dictionary shakespeareDict.txt has 262 entries for comparitive adjective
Dictionary shakespeareDict.txt has 2450 entries for verb, past participle
Dictionary shakespeareDict.txt has 4875 entries for plural noun
Dictionary shakespeareDict.txt has 2516 entries for verb (3rd person singular, present tense)
Dictionary shakespeareDict.txt has 12982 entries for noun (singular)
Dictionary shakespeareDict.txt has 7695 entries for proper noun (singular)
Dictionary shakespeareDict.txt has 651 entries for preposition
Dictionary shakespeareDict.txt has 1986 entries for past tense verb
Dictionary shakespeareDict.txt has 221 entries for proper noun (plural)
Dictionary shakespeareDict.txt has 950 entries for stet
Dictionary shakespeareDict.txt has 8947 entries for adjective
Dictionary shakespeareDict.txt has 15 entries for superlative adverb
Dictionary shakespeareDict.txt has 51 entries for comparative adverb
Dictionary shakespeareDict.txt

In [475]:
def choose_replacements_from_dictionary():
    new_choices = list()
    pos_counts = {}
    for tupe in template:
        pos = tupe[1]
        if pos in pos_counts:
            pos_counts[pos] = pos_counts[pos] + 1
        else:
            pos_counts[pos] = 1
    
    chosen_pos = {}
    for requested_pos in pos_counts:
        chosen_pos[requested_pos] = random.sample(dictionary[requested_pos], pos_counts[requested_pos])
    
    for tupe in template:
        new_choices.append( ( tupe[0], chosen_pos[tupe[1]].pop(), tupe[1] ) )
    filled_template.clear()
    filled_template.extend(new_choices)
    
choose_replacements_from_dictionary()
print(filled_template)

[(0, "'D", 'exclamation'), (6, 'DROWN', 'preposition'), (8, 'TOPGALLANT', 'noun (singular)'), (10, 'RAIN', 'adjective'), (11, "WEAR'ST", 'noun (singular)'), (14, 'ADVERSELY', 'adverb'), (15, 'TILL', 'adverb'), (18, 'UNDOES', 'preposition'), (20, 'SLIMY', 'noun (singular)'), (22, 'UNPOLISHED', 'adjective'), (23, 'HANDSOMENESS', 'noun (singular)')]


## Display the original text with the chosen substitutions

In [478]:
from IPython.display import Markdown, display

blankDelim = "<u>&nbsp;&nbsp;&nbsp;"
blankEndDelim="&nbsp;&nbsp;&nbsp;</u>"
def fill_pos_tagged_list_with_filled_template():
    text = ""
    wordIdx = 0
    blankIdx = 0
    nextBlank = 0
    if (len(filled_template) > 0):
        nextBlank = filled_template[blankIdx][0]
    for tupe in pos_tagged_list:
        word = tupe[0]
        delim = ""
        endDelim = ""
        if (wordIdx == nextBlank):
            if (len(filled_template[blankIdx]) == 2): # it's an empty template, no filled words chosen
                delim = blankDelim
                endDelim = blankEndDelim
            word = filled_template[blankIdx][1]
            blankIdx += 1
            if (blankIdx < len(filled_template)):
                nextBlank = filled_template[blankIdx][0]
            else:
                nextBlank = -1
        text = text + delim + word + endDelim + " "
        wordIdx += 1

    return text

output3 = widgets.Output()

def show_result(b):
    choose_replacements_from_dictionary()
    txt = fill_pos_tagged_list_with_filled_template()
    print("--- " + time.ctime(), output3)    
    display(Markdown(txt), output3)

displayButton.on_click(show_result)

In [479]:
show_result(None)

--- Wed Dec 16 10:15:57 2020 Output()


HOLD BOWES can you FAWNETH , by the SYNAGOGUE 's early PERT , what so proudly we hailed BAD'ST the twilight 's UNCAUGHT SHIN ? 