In [1]:
# Read in the corpus, including punctuation!
import pandas as pd

data = pd.read_pickle('corpus.pkl')
data

Unnamed: 0,transcript
episode 1,"Original release date: November 23, 2022 Wedne..."
episode 2,"Original release date: November 23, 2022 Wedne..."
episode 3,"Original release date: November 23, 2022 Wedne..."
episode 4,"Original release date: November 23, 2022 Wedne..."
episode 5,"Original release date: November 23, 2022 32 ye..."
episode 6,"Original release date: November 23, 2022 Wedne..."
episode 7,"Original release date: November 23, 2022 At Ma..."
episode 8,"Original release date: November 23, 2022 Wedne..."


In [7]:
# Extract only Brad Williams's text
ep_5_text = data.transcript.loc['episode 5']
ep_5_text[:200]

'Original release date: November\xa023,\xa02022 32 years ago, Gomez is arrested on suspicion of killing Garrett Gates at Nevermore. At present time, the Addamses visit Wednesday for parents’ weekend at Never'

In [8]:
from collections import defaultdict

def markov_chain(text):
    '''The input is a string of text and the output will be a dictionary with each word as
       a key and each value as the list of words that come after the key in the text.'''
    
    # Tokenize the text by word, though including punctuation
    words = text.split(' ')
    
    # Initialize a default dictionary to hold all of the words and next words
    m_dict = defaultdict(list)
    
    # Create a zipped list of all of the word pairs and put them in word: list of next words format
    for current_word, next_word in zip(words[0:-1], words[1:]):
        m_dict[current_word].append(next_word)

    # Convert the default dict back into a dictionary
    m_dict = dict(m_dict)
    return m_dict

In [9]:
# Create the dictionary for Ali's routine, take a look at it
ep_5_dict = markov_chain(ep_5_text)
ep_5_dict

{'Original': ['release'],
 'release': ['date:', 'Gomez'],
 'date:': ['November\xa023,\xa02022'],
 'November\xa023,\xa02022': ['32'],
 '32': ['years', 'years'],
 'years': ['ago,', 'ago?', 'of', 'to', 'ago.'],
 'ago,': ['Gomez'],
 'Gomez': ['is', 'is', 'reveals', 'after', 'and', 'Addams.', 'kissing'],
 'is': ['arrested',
  'cut',
  'guilty',
  'on',
  'expected',
  'not',
  'odd.',
  'disappointing.',
  'an',
  'neither',
  'exactly',
  'over.',
  'Bianca.',
  'no',
  'drying',
  'gonna',
  'a',
  'about,',
  'exactly',
  'that',
  'being',
  'a',
  'desperately',
  'all',
  'trying',
  'all',
  'about',
  'the',
  'Mother',
  'a',
  'a',
  'that',
  'over.'],
 'arrested': ['on', 'for', 'for'],
 'on': ['suspicion',
  'car]',
  'her',
  'the',
  'the',
  'my',
  'this',
  'the',
  'that',
  'my',
  'him.',
  'a',
  'the',
  'the',
  'that?',
  'his',
  'the',
  'the',
  'was',
  'the',
  'my',
  'earth?'],
 'suspicion': ['of'],
 'of': ['killing',
  'shapeshifting',
  'affection.',
  'a',


In [10]:
import random

def generate_sentence(chain, count=15):
    '''Input a dictionary in the format of key = current word, value = list of next words
       along with the number of words you would like to see in your generated sentence.'''

    # Capitalize the first word
    word1 = random.choice(list(chain.keys()))
    sentence = word1.capitalize()

    # Generate the second word from the value list. Set the new word as the first word. Repeat.
    for i in range(count-1):
        word2 = random.choice(chain[word1])
        word1 = word2
        sentence += ' ' + word2

    # End it with a period
    sentence += '.'
    return(sentence)

In [11]:
generate_sentence(ep_5_dict)

'Stop! Leave him alone! I’ll just trying to distract her. [music swells] I don’t. Because.'

In [12]:
import random
from collections import defaultdict

def generate_sentence(chain, count=15):
    if not chain:
        return "Cannot generate a sentence from an empty dictionary"

    # Use defaultdict to handle missing keys gracefully
    chain = defaultdict(list, chain)

    # Choose the first word randomly
    word1 = random.choice(list(chain.keys()))

    # Capitalize the first word
    sentence = word1.capitalize()

    # Generate subsequent words until the sentence has the desired length
    for i in range(count-1):
        # Choose the next word randomly based on its frequency in the chain
        word2 = random.choices(chain[word1], weights=[chain[word1].count(w) for w in chain[word1]])[0]

        # Add the word to the sentence
        sentence += ' ' + word2

        # Update the current word
        word1 = word2

        # End the sentence with appropriate punctuation
        if i == count-2:
            sentence += random.choice(['.', '!', '?'])

    return sentence

In [14]:
episodes=[]
for i in range(8):
    episodes.append(f"episode {i+1}")

In [15]:
sen={}

In [16]:
for episode in episodes:
    ep_text = data.transcript.loc[episode]
    ep_text[:200]
    ep_dict = markov_chain(ep_text)
    sentence=generate_sentence(ep_5_dict)
    sen[episode]=sentence

In [17]:
sen

{'episode 1': 'Drunken stupor. It was so excited to start? Maybe we have all the reason he’s?',
 'episode 2': 'Turned into the little death by a murderer wandering around town scot-free. At present time,?',
 'episode 3': 'Bed. Why did not your fishing gear. Stop trying to be in the hospital. That?',
 'episode 4': 'Date: November\xa023,\xa02022 32 years ago? [screams] [music builds ominously, fades away] [Wednesday sighs] I made?',
 'episode 5': 'Guilt for the punch and he had broken into the results we’d hoped. I’m sorry?',
 'episode 6': '[intense, sweeping music playing] Ah, Nevermore. [Sue] He’s already got a distraction. No, this in?',
 'episode 7': 'Suit you. And hope that I thought they are you have a stab wound. [Walker?',
 'episode 8': 'Had been living with the truth about my son have to be right way he.'}

In [21]:
import pandas as pd

In [23]:
sentences=[sen[episode] for episode in episodes]
sentences

['Drunken stupor. It was so excited to start? Maybe we have all the reason he’s?',
 'Turned into the little death by a murderer wandering around town scot-free. At present time,?',
 'Bed. Why did not your fishing gear. Stop trying to be in the hospital. That?',
 'Date: November\xa023,\xa02022 32 years ago? [screams] [music builds ominously, fades away] [Wednesday sighs] I made?',
 'Guilt for the punch and he had broken into the results we’d hoped. I’m sorry?',
 '[intense, sweeping music playing] Ah, Nevermore. [Sue] He’s already got a distraction. No, this in?',
 'Suit you. And hope that I thought they are you have a stab wound. [Walker?',
 'Had been living with the truth about my son have to be right way he.']

In [37]:
dic={'episode':episodes,'sentences':sentences}

In [40]:
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame(dic)
# Set 'Name' column as the index
df = df.set_index('episode')

In [41]:
print(df)

                                                                                                                sentences
episode                                                                                                                  
episode 1                                  Drunken stupor. It was so excited to start? Maybe we have all the reason he’s?
episode 2                   Turned into the little death by a murderer wandering around town scot-free. At present time,?
episode 3                                    Bed. Why did not your fishing gear. Stop trying to be in the hospital. That?
episode 4  Date: November 23, 2022 32 years ago? [screams] [music builds ominously, fades away] [Wednesday sighs] I made?
episode 5                                   Guilt for the punch and he had broken into the results we’d hoped. I’m sorry?
episode 6             [intense, sweeping music playing] Ah, Nevermore. [Sue] He’s already got a distraction. No, this in?
episode 7               