In [116]:
# Determining indefinite articles, pluralization, singularization

from pattern.en import referenced, pluralize, singularize

print referenced("university")
print referenced("hour")
print pluralize("child")
print singularize("wolves")

# Lots of other stuff is possible. Check out the documentation:
# http://www.clips.ua.ac.be/pages/pattern-en

a university
an hour
children
wolf


In [117]:
# POS tagging and verb conjugation...
# Let's turn obituaries into bizarrely specific fortunes! (ala Sam Lavigne)

from pattern.en import conjugate, parsetree, FUTURE
from pattern.search import search, match
from pprint import pprint
import re

In [118]:
obitFile = open('obituary.txt', 'r')
obitText = obitFile.read().decode('utf-8').encode('ascii', 'ignore')
obitFile.close()

# regex to tokenize with spaces...
tokenCand = re.split(r"([^a-zA-Z0-9]{2,})|([^a-zA-Z0-9']+)", obitText)
tokens = filter(lambda x: x and x is not None, tokenCand)
print tokens


# Here's the POS tagset:
# http://www.clips.ua.ac.be/pages/mbsp-tags

s = parsetree(obitText)

# Navigating the parsetree...
for sentence in s:
    for chunk in sentence.chunks:
        print chunk.type, [(w.string, w.type) for w in chunk.words]

['S', '. ', 'Parker', ' ', 'Gilbert', ', ', 'a', ' ', 'former', ' ', 'chairman', ' ', 'of', ' ', 'Morgan', ' ', 'Stanley', ' ', 'who', ' ', 'led', ' ', 'its', ' ', 'public', ' ', 'stock', ' ', 'offering', ' ', 'in', ' ', '1986', ' ', 'and', ' ', 'organized', ' ', 'a', ' ', 'management', ' ', 'revolt', ' ', 'at', ' ', 'that', ' ', 'storied', ' ', 'investment', ' ', 'house', ' ', '20', ' ', 'years', ' ', 'later', ', ', 'died', ' ', 'on', ' ', 'Wednesday', ' ', 'in', ' ', 'Manhattan', '. ', 'He', ' ', 'was', ' ', '81', '.\n\n', 'His', ' ', 'son', ' ', 'Parker', ' ', 'said', ' ', 'that', ' ', 'Mr', '. ', 'Gilbert', ' ', 'had', ' ', 'been', ' ', 'treated', ' ', 'for', ' ', 'chronic', ' ', 'obstructive', ' ', 'pulmonary', ' ', 'disease', ' ', 'for', ' ', 'years', ' ', 'and', ' ', 'that', ' ', 'he', ' ', 'died', ' ', 'at', ' ', 'NewYork', '-', 'Presbyterian', ' ', 'Hospital', '.\n\n', 'S', '. ', 'Parker', ' ', 'Gilbert', ' ', 'was', ' ', 'a', ' ', 'stepson', ' ', 'and', ' ', 'a', ' ', 'godson

In [119]:
# First step is reconjugating all the verbs to future tense
# and replacing the old verbs in the obituary
m = search('VP', s)

i = 0
j = 0
while i < len(tokens) and j < len(m):
    if m[j].string.startswith(tokens[i]):
        newVerb = conjugate(m[j].string)
        newVerbTokens = newVerb.split()
        oldVerbLen = len(m[j].string.split())
        
        if newVerbTokens[0] in ["would", "could", "to", "was"]:
            tokens[i] = newVerbTokens[0]
        else:
            tokens[i] = "will %s" % newVerbTokens[0]
        # skip punctuation tokens
        l = 1
        for k in range(2, oldVerbLen*2)[::2]:
            tokens[i+k] = newVerbTokens[l]
            l += 1
        
        i += 1
        j += 1
    else:
        i += 1

newText = "".join(tokens)

newText = re.sub(r"\bhad\b", "have", newText)

# Update years

years = re.findall(r"\b[0-9]{4}\b", newText)

for year in years:
    newText.replace(year, str(int(year)+20))
    
# Change name and personal pronouns

newText = re.sub(r"\bshe\b|\bhe\b", "you", newText, flags=re.IGNORECASE)
newText = re.sub(r"\bher\b|\bhis\b", "your", newText, flags=re.IGNORECASE)

obitName = match('NP', s).string # Assuming that deceased's name will be first noun phrase!
obitNameTokens = obitName.split()

obitTitle = "(Mr.|Mrs.|Ms.) " + obitNameTokens[-1]

newText = re.sub(obitName, "you", newText)
newText = re.sub(obitTitle, "you", newText)

# capitalize sentences...

newText = list(newText)

newText[0] = newText[0].upper()

for i in range(2, len(newText)):
    if newText[i-1] == '\n' or newText[i-2] == '.':
        newText[i] = newText[i].upper()
        
newText = ''.join(newText)

print newText

You, a former chairman of Morgan Stanley who will lead its public stock offering in 1986 and will organize a management revolt at that storied investment house 20 years later, will die on Wednesday in Manhattan. You will be 81.

Your son Parker will say that you will have been treat for chronic obstructive pulmonary disease for years and that you will die at NewYork-Presbyterian Hospital.

You will be a stepson and a godson of Morgan Stanleys founders, and you will come of age on Wall Street when family name and pedigree will be often enough to land one will also play at the elite partnerships of the day, like Morgan Stanley and Goldman Sachs.

But you also played a role in will explode this cozy world when you will unite a will divide body of Morgan Stanley bankers behind the idea that the firm could grow and will prosper only if it will sell shares to the public.

That will move, and the subsequent pressures that the business will face as a public company, will lead to Morgan Stanley