# Red Hat Libs

From an arbitrary English text, produce a __Mad Libs__-style result: Replace some parts of speech in the original text with corresponding parts of speech from a dictionary. The number of blanks — that is, the number of words that will be replaced from words in the dictionary — is controlled by the **Madness** setting.

## To begin - first choose "Cell > Run All" from the menu, then [Click here to play](#Play-Red-Hat-Libs)

## What's going on here?

This Jupyter Notebook uses the NLTK library to [tokenize and tag parts of speech](#Under-the-hood:-Tag-the-parts-of-speech). Next, we [choose which words to turn into blanks](#Under-the-hood:-Create-blanks-in-the-tagged-text). Then we [choose replacement words from the dictionary](#Under-the-hood:-Choose-replacement-words-from-the-dictionary) and [display the resulting combined text](#Under-the-hood:-Display-the-original-text-with-the-chosen-substitutions).

In [None]:
pip install nltk

In [None]:
# Set up the environment
import nltk

import ipywidgets as widgets
from IPython.display import display

defaultSourceText = """Oh say can you see, by the dawn's early light, what so proudly we hailed at the twilight's last gleaming?"""
defaultDictionaryName = "shakespeareDict.txt"

if get_ipython():
    nltk.download('tagsets')
    nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')
    
# populate the user-friendly terminology for parts of speech
posDict = {}
with open("nltk-tagsets-pos.txt") as f:
    for line in f:
        row = line.split(" : ")
        key = row[0]
        val = row[1]
        posDict[key] = val.strip()

# populate the list of dictionaries
dictDict = {}
with open("dictionaries.db") as f:
    for line in f:
        row = line.split(" : ")
        key = row[1].strip()
        val = row[0].strip()
        dictDict[key] = val

def human_readable_pos_tagged_tuple( tupe ): # a tuple
    word, pos = tupe
    if pos in posDict:
        pos = posDict[pos]
    else:
        pos = "stet"
    return ( word, pos )

pos_tagged_list = list()
template = list()
dictionary = {}
filled_template = list()

# set up UI elements
sourceTextInput = widgets.Textarea(value=defaultSourceText, description='Source text:')
sourceTextInput.layout.height = '130px'
sourceTextInput.layout.width = '100%'
madnessSlider = widgets.IntSlider(min=0, max=10, step=1, description='Madness:', value=3)
dictChooser = widgets.Dropdown(options=dictDict, description="Dictionary:")
tagButton = widgets.Button(description="Click to play Red Hat Libs")
tagButton.layout.width = '200px'
def run_all_below(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))
tagButton.on_click(run_all_below)

## Play Red Hat Libs

In [None]:
from IPython.display import Markdown, Javascript

display(sourceTextInput)
display(madnessSlider)
display(dictChooser)
display(tagButton)
display(Markdown("**Results:**"))
resultsHandle = display(display_id=True)
resultsHandle.display(Markdown("_no results_"))

## Under the hood: Tag the parts of speech

In [None]:
def tag_parts_of_speech(sourceText=sourceTextInput.value):

    Sentence = nltk.word_tokenize(sourceText)
    posTagged = nltk.pos_tag(Sentence)

    resultList = map(human_readable_pos_tagged_tuple, posTagged)
    pos_tagged_list.clear()
    pos_tagged_list.extend(resultList)
    print(pos_tagged_list)
    
tag_parts_of_speech()

## Under the hood: Create blanks in the tagged text

The **Madness** setting controls how many blanks will be created.

In [None]:
import random

def create_blanks_from_pos_tagged_list(madness=madnessSlider.value):
    def indexed_pos_tuple(tupe, num ):
        return ( num, tupe[1] )

    indexed_list = map(indexed_pos_tuple, pos_tagged_list, range(len(pos_tagged_list)))
    indexed_pos_list = map(indexed_pos_tuple, pos_tagged_list, range(len(pos_tagged_list)))
    non_stet_indexed_pos_list = list(filter(lambda t: 1 * (t[1]!="stet"), indexed_pos_list))
    num_candidates = len(non_stet_indexed_pos_list)
    blanks = list()
    if (madness > 10): # error avoidance
        madness = 10
    num_desired_blanks = int(float(madness/10) * num_candidates)
    print("Madness level", madness, "- creating", num_desired_blanks, "blanks from among", num_candidates, "candidates")
    if (madness > 0):
        blanks = random.sample(non_stet_indexed_pos_list, num_desired_blanks)
        blanks.sort(key = lambda tup: tup[0])

    template.clear()
    template.extend(blanks)
    print(template)

create_blanks_from_pos_tagged_list()

## Under the hood: Choose replacement words from the dictionary

The **Dictionary** dropdown controls which dictionary is used.

In [None]:
def load_dictionary(file_name=dictChooser.value):
    new_dict = {}
    with open(file_name) as f:
        for line in f:
            if (line == "<dictionary>\n" or line == "</dictionary>\n"):
                continue
            row = line.split(" : ")
            key = row[0]
            val = row[1]
            new_dict[key] = val.split(", ")
    dictionary.clear()
    dictionary.update(new_dict)
    for entry in dictionary:
        print("Dictionary" , file_name , "has" , len(dictionary[entry]) , "entries for", entry)
        
load_dictionary()

In [None]:
def choose_replacements_from_dictionary():
    new_choices = list()
    pos_counts = {}
    for tupe in template:
        pos = tupe[1]
        if pos in pos_counts:
            pos_counts[pos] = pos_counts[pos] + 1
        else:
            pos_counts[pos] = 1

    chosen_pos = {}
    for requested_pos in pos_counts:
        chosen_pos[requested_pos] = random.sample(dictionary[requested_pos], pos_counts[requested_pos])
    
    for tupe in template:
        new_choices.append( ( tupe[0], chosen_pos[tupe[1]].pop(), tupe[1] ) )
    filled_template.clear()
    filled_template.extend(new_choices)
    print(filled_template)
    
choose_replacements_from_dictionary()

## Under the hood: Display the original text with the chosen substitutions

In [None]:
blankDelim = "<u>&nbsp;&nbsp;&nbsp;"
blankEndDelim="&nbsp;&nbsp;&nbsp;</u>"
def fill_pos_tagged_list_with_filled_template():
    text = ""
    wordIdx = 0
    blankIdx = 0
    nextBlank = -1
    if (len(filled_template) > 0):
        nextBlank = filled_template[blankIdx][0]
    for tupe in pos_tagged_list:
        word = tupe[0]
        delim = ""
        endDelim = ""
        if (wordIdx == nextBlank):
            if (len(filled_template[blankIdx]) == 2): # it's an empty template, no filled words chosen
                delim = blankDelim
                endDelim = blankEndDelim
            word = filled_template[blankIdx][1]
            blankIdx += 1
            if (blankIdx < len(filled_template)):
                nextBlank = filled_template[blankIdx][0]
            else:
                nextBlank = -1
        text = text + delim + word + endDelim + " "
        wordIdx += 1

    if (len(text) == 0):
        text = "_no results_"
    return text

display(Markdown("### Inputs"))
display(Markdown("__Source text:__ " + sourceTextInput.value))
display(Markdown("__Madness:__ " + str(madnessSlider.value)))
display(Markdown("__Dictionary:__ " + dictChooser.label))
display(Markdown("### Results"))

txt = fill_pos_tagged_list_with_filled_template()  
m = Markdown(txt)
resultsHandle.update(m)
display(m)
display(Markdown("[Play again](#Play-Red-Hat-Libs)"))