# Song Lyric Generator

In [1]:
import requests
import time
from bs4 import BeautifulSoup

In [2]:
#The website below has links to The Meter's songs. This cell scrapes the links to each song from The Meter's main
#page and puts them into a list called links.
links = []
resp = requests.get("http://www.songlyrics.com/the-meters-lyrics/")
soup = BeautifulSoup(resp.content, "html.parser")
table = soup.find("table", {"class" : "tracklist"})
for item in table.find_all("tr"):
    links.append(item.find("a")["href"])
    

In [3]:
#Going through each link and get the lyrics for each song and add them to a list called
#lyrics. 

lyrics = []

import time

for link in links:
    resp = requests.get(link)
    soup = BeautifulSoup(resp.content, "html.parser")
    lyrics.append(soup.find("p", {"id" : "songLyricsDiv"}).text)
    time.sleep(0.5)

In [5]:
# Cleaning
for i in range(len(lyrics)):
    song = lyrics[i]
    song = song.lower()
    song = song.replace("\n\n", "\n")
    song = song.replace("\n", " <N> ")
    song = song.replace("?", " ")
    song = song.replace(",", "")
    song = song.replace("-", " ")
    song = song.replace("'", "")
    song = "<START> " + song + " <END>"
    lyrics[i] = song

for i in range(len(lyrics)):
    song = lyrics[i]
    song = lyrics[i].split()
    lyrics[i] = song   

In [6]:
#Pickle holds our data
import pickle
pickle.dump(lyrics, open("lyrics.pkl", "wb"))

In [7]:
#Now we will create a list of every word in The Meter's songs, and for each word we will have another list of all the words
#that followed that word in his song. 
def train_markov_chain(lyrics):
    """
    Args:
      - lyrics: a list of strings, where each string represents
                the lyrics of one song by an artist.
    
    Returns:
      A dict that maps a single word ("unigram") to a list of
      words that follow that word, representing the Markov
      chain trained on the lyrics.
    """
    chain = {"<START>": []}
    for lyric in lyrics:
        for i in range(len(lyric)):
            word = lyric[i]
            if chain.get(word)== None:
                if (i+1) < len(lyric):
                    chain[word] = [lyric[i+1]]
            else:
                if (i+1) < len(lyric):
                    chain[word].append(lyric[i+1])
        
        
    return chain

In [8]:
# Load the pickled lyrics object.
import pickle
lyrics = pickle.load(open("lyrics.pkl", "rb"))

# Call the function you wrote above.
chain = train_markov_chain(lyrics)

# What words tend to start a song (i.e., what words follow the <START> tag?)
print(chain["<START>"])

# What words tend to begin a line (i.e., what words follow the line break tag?)
print(chain["<N>"][:20])

['we', 'gettin', 'little', 'loving', 'im', 'skin', 'we', 'wouldf', '12:', 'fire', 'bacon', 'powers', 'listen', 'hang', 'got', 'middle', 'es', 'es', '12:', 'wouldf', 'wouldf', 'ah', 'fire', 'powers', 'well', 'clap', 'how', 'the', 'funkify', 'be', 'here', 'little', 'fire', '12:', 'es', 'got', 'im', 'i', 'little', 'powers', 'wouldf', 'i', 'me', 'little', 'little', 'ive', 'i', 'es', 'ah', 'just', 'streets', 'clap', 'if', 'ride', 'streets', 'badlees', 'everclear', 'badlees', 'cause', 'just']
['gettin', 'funky', 'is', 'if', 'keep', 'keep', 'yeah', 'keep', 'keep', 'i', 'here', 'hooaaaaayeaah', 'gettin', 'gettin', 'funky', 'its', 'you', 'feel', 'good', 'gonna']


In [9]:
#Using the Markov chain we made above, we will essentially create a path through the song. We will randomly select
#a word that typically starts the song, then randomly select a word that follows that word, and randomly select
#a word that follows that word, and so on until we reach the end.
import random

def generate_new_lyrics(chain):
    """
    Args:
      - chain: a dict representing the Markov chain,
               such as one generated by generate_new_lyrics()
    
    Returns:
      A string representing the randomly generated song.
    """
    
    # a list for storing the generated words
    words = []
    # generate the first word
    word = random.choice(chain["<START>"])
    words.append(word)
    while word != "<END>":
        word = random.choice(chain[word])
        words.append(word)
    
    
    # join the words together into a string with line breaks
    lyrics = " ".join(words[:-1])
    return "\n".join(lyrics.split("<N>"))

In [10]:
#Our generated The Meters song
print(generate_new_lyrics(chain))

ride 
 the bayou 
 just kissed my home start an day all the rich are free 
 (ad lib with a bottle of steel 
 surely would come so i wennon up some good music 
 people get it on strut just set that theres no sense. 
 makes no one spinal cracker 
 the middle of gin soaked bar and its a 5th of gin 
 the cab to dance gossa do your shoes 
 countin you gonna light the bayou 
 feel good music watch me 
 just kissed by my sweetness my plans 
 huh 
 just got to try dragging my share is for trouble 
 that shes looking cause i saw you gettin down your heart is such a bottle of the road is gonna get along yeah 
 cause i was cool wine 
 the curb 
 hey hey pocky a north bound train 
 (what the river cant you feel the road 
 the way 
 the honky tonk women 
 lets get on fire 
 with this town (lynott) 
 well this in college 
 when i just kissed my favorite song i just keep on getting richer 
 bomp!: buh deeba doomp beemp bomp: bomp! bomp!: buh deeba doomp beemp bomp: hey 
 hey 
 deep in the road 
 what

In [11]:
#We will now train another Markov chain using bigrams. Essentially the same logic, except we are finding 
#the words that follow every pair of words instead of a single word. 
for song in lyrics:
    song.insert(0, None)
    song.append(None)

In [12]:
def train_markov_chain(lyrics):
    """
    Args:
      - lyrics: a list of strings, where each string represents
                the lyrics of one song by an artist.
    
    Returns:
      A dict that maps a tuple of 2 words ("bigram") to a list of
      words that follow that bigram, representing the Markov
      chain trained on the lyrics.
    """
    chain = {}
    for lyric in lyrics:
        for i in range(len(lyric)):
            if (i+2) < len(lyric):
                word = (lyric[i], lyric[i+1])
                if chain.get(word) == None:
                    chain[word] = [lyric[i+2]]
                else:
                    chain[word].append(lyric[i+2])
        

    return chain

In [13]:
# Call the function above.
chain = train_markov_chain(lyrics)

# What words tend to start a song (i.e., what words follow the <START> tag?)
print(chain[(None, "<START>")])

['we', 'gettin', 'little', 'loving', 'im', 'skin', 'we', 'wouldf', '12:', 'fire', 'bacon', 'powers', 'listen', 'hang', 'got', 'middle', 'es', 'es', '12:', 'wouldf', 'wouldf', 'ah', 'fire', 'powers', 'well', 'clap', 'how', 'the', 'funkify', 'be', 'here', 'little', 'fire', '12:', 'es', 'got', 'im', 'i', 'little', 'powers', 'wouldf', 'i', 'me', 'little', 'little', 'ive', 'i', 'es', 'ah', 'just', 'streets', 'clap', 'if', 'ride', 'streets', 'badlees', 'everclear', 'badlees', 'cause', 'just']


In [14]:
import random

def generate_new_lyrics(chain):
    """
    Args:
      - chain: a dict representing the Markov chain,
               such as one generated by generate_new_lyrics()
    
    Returns:
      A string representing the randomly generated song.
    """
    
    # a list for storing the generated words
    words = []
    # generate the first word
    tuple = (None, "<START>")
    word = random.choice(chain[tuple])
    words.append(word)
    while tuple[1] != "<END>":
        tuple = (tuple[1], random.choice(chain[tuple])) 
        words.append(tuple[1])
    
    
    # join the words together into a string with line breaks
    lyrics = " ".join(words[:-1])
    return "\n".join(lyrics.split("<N>"))

In [15]:
#Generated The Meters song with bigrams. Do you think it's better?
print(generate_new_lyrics(chain))

12: ah hey bra what kinda music you got mad; 
 got to be 
 right about now lord have mercy 
 ugah! get down and low. 
 a little softer; people get louder. 
 we had a fight then you got 
 funky music watch yourself here we go 
 we had a fight then you got the word 
 im a live wire and im gonna set this town on fire
