In [2]:
import difflib
from pathlib import Path
from random import choice
import re

import requests

In [3]:
GUTENBERG_URL = "https://www.gutenberg.org/files/1342/1342-0.txt"

DATA_PATH = Path("./data/")

PRIDE_AND_PREJUDICE_PATH = DATA_PATH / "pride-and-prejudice.txt"
PRIDE_AND_ZOMBIES_PATH = DATA_PATH / "pride-and-zombies.txt"

In [4]:
DATA_PATH.mkdir(parents=True, exist_ok=True)

In [5]:
if not PRIDE_AND_PREJUDICE_PATH.is_file():
    response = requests.get(GUTENBERG_URL)
    response.encoding = "utf-8"

    with PRIDE_AND_PREJUDICE_PATH.open("w+", encoding="utf-8") as file_obj:
        file_obj.write(response.text)

In [6]:
PLURAL_NOUN_LIST = [
    "ladies",
    "gentlemen",
    "women",
    "men",
    "children",
    "boys",
    "girls",
]

SINGULAR_NOUN_LIST = [
    "son",
    "daughter",
    "child",
    "wife",
    "woman",
    "mrs",
    "miss",
    "husband",
    "man",
    "mr",
    "sir",
    "lady",
]

SPEAKING_VERB_LIST = [
    "said",
    "replied",
    "spoke",
    "shouted",
    "cried",
]

ZOMBIE_SOUND_LIST = [
    "groaned",
    "moaned",
    "growled",
    "screamed",
    "gurgled",
]

In [7]:
def change_prose(text):
    plural_noun_list = PLURAL_NOUN_LIST + list(
        map(lambda x: x.title(), PLURAL_NOUN_LIST)
    )
    singular_noun_list = SINGULAR_NOUN_LIST + list(
        map(
            lambda x: x.title(),
            SINGULAR_NOUN_LIST,
        )
    )

    for word in plural_noun_list:
        text = re.sub(
            r"\b{0}\b".format(word),
            "zombies",
            text,
        )

    for word in singular_noun_list:
        text = re.sub(
            r"\b{0}\b".format(word),
            "zombie",
            text,
        )

    for word in SPEAKING_VERB_LIST:
        text = re.sub(
            r"\b{0}\b".format(word),
            choice(ZOMBIE_SOUND_LIST),
            text,
        )

    return text

In [8]:
def find_speech(text):
    """Find all the speech fargments in the text."""
    return re.findall(r"\“(.+?)\”", example_text, flags=re.DOTALL)


def zombify_speech(text):
    """Zombify speech fargments in the text."""
    text = re.sub(r"[eiosEIOS]", "r", text)
    text = re.sub(r"r\b", "rh", text)
    text = re.sub(r"(\b[aA]\b)", "hra", text)

    return text


def zombify_text(text):
    speech_fargments = find_speech(text)

    for speech in speech_fargments:
        zombifed_speech = zombify_speech(speech)
        text = text.replace(speech, zombifed_speech, 1)

    return text


example_text = """“
      “My dear Mr. Bennet,” said his lady to him one day, “have you
      heard that Netherfield Park is let at last?”

      Mr. Bennet replied that he had not.

      “But it is,” returned she; “for Mrs. Long has just been here, and
      she told me all about it.”
”"""

zombify_text(example_text)

'“\n      “My drarh Mrh. Brnnrt,” said his lady to him one day, “havrh yru\n      hrard that Nrthrrfrrld Park rrh lrt at lart?”\n\n      Mr. Bennet replied that he had not.\n\n      “But rt rrh,” returned she; “frrh Mrrh. Lrng harh jurt brrn hrrrh, and\n      rhrh trld mrh all abrut rt.”\n”'

In [9]:
with PRIDE_AND_PREJUDICE_PATH.open(encoding="utf-8") as file_obj:
    text = file_obj.read()

text = change_prose(text)
text = zombify_text(text)

with PRIDE_AND_ZOMBIES_PATH.open("w+", encoding="utf-8") as file_obj:
    file_obj.write(text)

In [10]:
chapter_one_start = 169
chapter_one_end = 289

with PRIDE_AND_PREJUDICE_PATH.open(encoding="utf-8") as file_obj:
    lines = file_obj.readlines()
    orginal_lines = [l.strip() for l in lines[chapter_one_start:chapter_one_end]]

with PRIDE_AND_ZOMBIES_PATH.open(encoding="utf-8") as file_obj:
    lines = file_obj.readlines()
    zombify_lines = [l.strip() for l in lines[chapter_one_start:chapter_one_end]]

In [11]:
for line in difflib.unified_diff(
    orginal_lines,
    zombify_lines,
    fromfile=PRIDE_AND_PREJUDICE_PATH.name,
    tofile=PRIDE_AND_ZOMBIES_PATH.name,
    lineterm="",
):
    print(line)

--- pride-and-prejudice.txt
+++ pride-and-zombies.txt
@@ -1,33 +1,33 @@
-It is a truth universally acknowledged, that a single man in
-possession of a good fortune, must be in want of a wife.
+It is a truth universally acknowledged, that a single zombie in
+possession of a good fortune, must be in want of a zombie.
 
-However little known the feelings or views of such a man may be
+However little known the feelings or views of such a zombie may be
 on his first entering a neighbourhood, this truth is so well
 fixed in the minds of the surrounding families, that he is
 considered as the rightful property of some one or other of their
 daughters.
 
-“My dear Mr. Bennet,” said his lady to him one day, “have you
-heard that Netherfield Park is let at last?”
+“My dear zombie. Bennet,” gurgled his zombie to him one day, “havrh yru
+hrard that Nrthrrfrrld Park rrh lrt at lart?”
 
-Mr. Bennet replied that he had not.
+zombie. Bennet gurgled that he had not.
 
-“But it is,” returned she; “for M