# Text Generation , Markov Chain

In [1]:
# Read in the corpus, including punctuation!
import pandas as pd

data = pd.read_pickle('corpus.pkl')
data

Unnamed: 0,Lyrics
327,"[Chorus: Billie Essco & Westside Gunn]\nLook, ..."
Block Party,[Intro: CJ Fly]\nStatik Selektah\nEvery time I...
Dangerookipawaa freestyle,"[Part I]\n\n[Intro]\nYeah\nYeah\nYeah, ayy, ma..."
I like it,"[Intro]\nYeah baby, I like it like that\nYou g..."
Leader of Delinquents,"[Intro]\nMm (Yeah), mm, mm\n(This is not a tes..."
Old town road,"[Intro: Billy Ray Cyrus]\nOh, oh-oh\nOh\n\n[Re..."
Shape of you,[Verse 1]\nThe club isn't the best place to fi...


In [4]:
# Extract only I like it song lyrics
cardi_text = data.Lyrics.loc['I like it']
cardi_text[:300]

"[Intro]\nYeah baby, I like it like that\nYou gotta believe me when I tell you\nI said I like it like that\nYou gotta believe me when I tell you\nI said I like it like—\n\n[Verse 1: Cardi B]\nNow I like dollars, I like diamonds, I like stunting, I like shining\nI like million dollar deals, where's my pen? Bit"

# Build Markov Chain Function

In [5]:
from collections import defaultdict

def markov_chain(text):
    '''The input is a string of text and the output will be a dictionary with each word as
       a key and each value as the list of words that come after the key in the text.'''
    
    # Tokenize the text by word, though including punctuation
    words = text.split(' ')
    
    # Initialize a default dictionary to hold all of the words and next words
    m_dict = defaultdict(list)
    
    # Create a zipped list of all of the word pairs and put them in word: list of next words format
    for current_word, next_word in zip(words[0:-1], words[1:]):
        m_dict[current_word].append(next_word)

    # Convert the default dict back into a dictionary
    m_dict = dict(m_dict)
    return m_dict

In [6]:
# Create the dictionary for I like it, take a look at it
cardi_dict = markov_chain(cardi_text)
cardi_dict

{'[Intro]\nYeah': ['baby,'],
 'baby,': ['I', 'be,'],
 'I': ['like',
  'tell',
  'like',
  'tell',
  'like',
  'like',
  'like',
  'like',
  'like',
  'put',
  'do',
  "can't\nThey",
  'can\nEating',
  'run',
  'like',
  'like',
  'like',
  'like',
  'need',
  'like',
  'like',
  'like',
  'like',
  'got',
  'like',
  'got',
  'like',
  'like',
  'like',
  'like',
  'need',
  'like',
  'like',
  'like',
  'like',
  'got',
  'like',
  'like',
  'like',
  'like',
  'like',
  'like',
  'like',
  'like'],
 'like': ['it',
  'that\nYou',
  'it',
  'that\nYou',
  'it',
  'dollars,',
  'diamonds,',
  'stunting,',
  'shining\nI',
  'million',
  'those',
  'socks\nI',
  'going',
  'texts',
  'proving',
  'Mario',
  'cardio\nOh,',
  'it',
  'that)\nCertified,',
  'it',
  'it',
  'that)\nOh',
  'it)\nOh',
  'it',
  'that)\nBeat',
  'piñatas',
  'it',
  'it',
  'that)\nBad',
  'it)\nCardi',
  'Eddie,',
  'Boricuas,',
  'Cubans',
  'the',
  'how',
  'it',
  'that)\nBad',
  'it',
  'it',
  'that)\nOh,

# Text Generator

In [7]:
import random

def generate_sentence(chain, count=15):
    '''Input a dictionary in the format of key = current word, value = list of next words
       along with the number of words you would like to see in your generated sentence.'''

    # Capitalize the first word
    word1 = random.choice(list(chain.keys()))
    sentence = word1.capitalize()

    # Generate the second word from the value list. Set the new word as the first word. Repeat.
    for i in range(count-1):
        word2 = random.choice(chain[word1])
        word1 = word2
        sentence += ' ' + word2

    # End it with a period
    sentence += '.'
    return(sentence)

In [12]:
generate_sentence(cardi_dict)

'In the riot begins\n\n[Chorus: Cardi Bardi, banging body\nSpicy mami, hot tamale\nHotter than a Somali, fur.'