In [1]:
# import libraries we'll need

import requests
from bs4 import BeautifulSoup

In [2]:
# get the contents of the candidate playlist page

resp = requests.get("https://www.nytimes.com/interactive/2019/08/19/us/politics/presidential-campaign-songs-playlists.html") 

In [3]:
# convert it to string and just make sure it worked
html_str = resp.text

html_str



In [4]:
# load it into beautiful soup

document = BeautifulSoup(html_str, "html.parser")

In [5]:
# find all the song title tags and see what they look like

title_tags = document.find_all("span", attrs={"class": "song-title"})

title_tags

[<span class="song-title">Respect</span>,
 <span class="song-title">9 to 5</span>,
 <span class="song-title">Stand by Me</span>,
 <span class="song-title">Work That</span>,
 <span class="song-title">High Hopes</span>,
 <span class="song-title">Dance to the Music</span>,
 <span class="song-title">Love Train</span>,
 <span class="song-title">I’m Coming Out</span>,
 <span class="song-title">One Nation Under a Groove</span>,
 <span class="song-title">Rocking in the Free World</span>,
 <span class="song-title">My Shot</span>,
 <span class="song-title">Superwoman</span>,
 <span class="song-title">Good as Hell</span>,
 <span class="song-title">Higher Ground</span>,
 <span class="song-title">Lovely Day</span>,
 <span class="song-title">California Love </span>,
 <span class="song-title">Run the World (Girls)</span>,
 <span class="song-title">Think</span>,
 <span class="song-title">Dis Generation</span>,
 <span class="song-title">Girl on Fire</span>,
 <span class="song-title">Revolution</span>,


In [6]:
# now let's make a dictionary in the format {song title, artist}

song_dict = {} 

for title in title_tags:
    # get the string associated w/ the tag
    song_title = title.string
    # while we're looking at that title, look for the next sibling
    artist_name = title.find_next_sibling('span')
    # add it to the dict (and be sure to add only the string)
    song_dict[song_title] = artist_name.string
    
song_dict

{'Respect': 'Aretha Franklin',
 '9 to 5': 'Dolly Parton',
 'Stand by Me': 'Ben E. King',
 'Work That': 'Mary J. Blige',
 'High Hopes': 'Panic! at the Disco',
 'Dance to the Music': 'Sly & the Family Stone',
 'Love Train': 'The O’Jays',
 'I’m Coming Out': 'Diana Ross',
 'One Nation Under a Groove': 'Funkadelic',
 'Rocking in the Free World': 'Neil Young',
 'My Shot': 'Lin-Manuel Miranda',
 'Superwoman': 'Alicia Keys',
 'Good as Hell': 'Lizzo',
 'Higher Ground': 'Stevie Wonder',
 'Lovely Day': 'Bill Withers',
 'California Love ': 'Tupac Shakur',
 'Run the World (Girls)': 'Beyoncé',
 'Think': 'Aretha Franklin',
 'Dis Generation': 'A Tribe Called Quest',
 'Girl on Fire': 'Alicia Keys',
 'Revolution': 'Toots and the Maytals',
 'Power to the People': 'John Lennon',
 'Made in the USA': 'Demi Lovato',
 'Believer': 'Imagine Dragons',
 'Latinoamérica': 'Calle 13',
 'God Bless the USA': 'Lee Greenwoood',
 'I Love Rock ’n Roll': 'Joan Jett & the Blackhearts',
 'Confident': 'Demi Lovato',
 'I Wanna

In [7]:
# now it's time to put our API knowledge to use

# first, define your API key to authorize your request when using the API:
client_access_token = "zK66h3XvLu38c8sH0ZHu2DogDZJfBUyaE2dhuXGNhgoxbZWOjO7tNf2J9qedMbSv" # Replace with your token

In [8]:
# now, iterate through and get our lyrics URLs

url_dict = {}

for item in song_dict:
    song_title = item
    song_artist = song_dict[item]
        
    # Remember this is the URL for a search via the Genius API:
    genius_search_url = f'http://api.genius.com/search?q={song_title}&access_token={client_access_token}'

    # here's the API call
    resp = requests.get(genius_search_url)
    data = resp.json()
    
    # now search for match w/ artist 
    for song in data['response']['hits']:
        if song['result']['primary_artist']['name'] == song_artist:
            # if there's a match, get the url
            lyrics_url = song['result']['url']

            # add it to the url dict in format {song title, url}
            url_dict[song_title] = lyrics_url
            
            # just give a status update
            print("Matched artist: " + song_artist + " and title: " + song_title)
 
            # break out of for loop since we've got a match 
            break
            
url_dict

Matched artist: Aretha Franklin and title: Respect
Matched artist: Dolly Parton and title: 9 to 5
Matched artist: Ben E. King and title: Stand by Me
Matched artist: Mary J. Blige and title: Work That
Matched artist: Panic! at the Disco and title: High Hopes
Matched artist: The O’Jays and title: Love Train
Matched artist: Diana Ross and title: I’m Coming Out
Matched artist: Funkadelic and title: One Nation Under a Groove
Matched artist: Neil Young and title: Rocking in the Free World
Matched artist: Lin-Manuel Miranda and title: My Shot
Matched artist: Alicia Keys and title: Superwoman
Matched artist: Lizzo and title: Good as Hell
Matched artist: Stevie Wonder and title: Higher Ground
Matched artist: Bill Withers and title: Lovely Day
Matched artist: Beyoncé and title: Run the World (Girls)
Matched artist: A Tribe Called Quest and title: Dis Generation
Matched artist: Alicia Keys and title: Girl on Fire
Matched artist: John Lennon and title: Power to the People
Matched artist: Demi Lova

{'Respect': 'https://genius.com/Aretha-franklin-respect-lyrics',
 '9 to 5': 'https://genius.com/Dolly-parton-9-to-5-lyrics',
 'Stand by Me': 'https://genius.com/Ben-e-king-stand-by-me-lyrics',
 'Work That': 'https://genius.com/Mary-j-blige-work-that-lyrics',
 'High Hopes': 'https://genius.com/Panic-at-the-disco-high-hopes-lyrics',
 'Love Train': 'https://genius.com/The-ojays-love-train-lyrics',
 'I’m Coming Out': 'https://genius.com/Diana-ross-im-coming-out-lyrics',
 'One Nation Under a Groove': 'https://genius.com/Funkadelic-one-nation-under-a-groove-lyrics',
 'Rocking in the Free World': 'https://genius.com/Neil-young-rockin-in-the-free-world-electric-lyrics',
 'My Shot': 'https://genius.com/Lin-manuel-miranda-my-shot-demo-lyrics',
 'Superwoman': 'https://genius.com/Alicia-keys-superwoman-lyrics',
 'Good as Hell': 'https://genius.com/Lizzo-good-as-hell-lyrics',
 'Higher Ground': 'https://genius.com/Stevie-wonder-higher-ground-lyrics',
 'Lovely Day': 'https://genius.com/Bill-withers-l

In [9]:
# now we've got our URL dict so we can start scraping lyrics!

import re

for item in url_dict:
    song_title = item
    song_artist = song_dict[item]   # note that later in the semester, we'll learn
                                    # another method of storing multidimensional data
                                    # for now, though, we'll just keep these two dicts
    song_url = url_dict[item]  

    # get the contents of the lyrics page
    resp = requests.get(song_url) 
    html_str = resp.text

    # turn it into a BS object and get the lyrics div
    document = BeautifulSoup(html_str, "html.parser")
    lyrics = document.find('p').text
    
    # let's do some quick cleaning 
    cleaner_lyrics = re.sub("\[.*\]\n", "", lyrics) # remove square brackets and contents
    cleanest_lyrics = re.sub("^[\n]{3}","", cleaner_lyrics) # remove the three opening newlines

    # store it in a file

    # we'll use the final part of the URL as a filename 
    print(song_url)
    filename = song_url.replace("https://genius.com/","")
    print(filename)
    filename = filename.replace("-lyrics","") + ".txt"
    print(filename)
    
    # store the lyrics in a subdir called "lyrics"; 
    path = "lyrics/" + filename
    
    # IMPORTANT: "lyrics" dir needs to exist first, otherwise it will break  
    # you need to go to the directory on your computer that holds  
    # this Jupyter notebook (class8-lyrics-scraping-inclass-ds) and create a new folder called 'lyrics'
    
    with open(path, "w") as file:
        file.writelines(cleanest_lyrics)
        print("Wrote lyrics to: " + filename)

https://genius.com/Aretha-franklin-respect-lyrics
Aretha-franklin-respect-lyrics
Aretha-franklin-respect.txt
Wrote lyrics to: Aretha-franklin-respect.txt
https://genius.com/Dolly-parton-9-to-5-lyrics
Dolly-parton-9-to-5-lyrics
Dolly-parton-9-to-5.txt
Wrote lyrics to: Dolly-parton-9-to-5.txt
https://genius.com/Ben-e-king-stand-by-me-lyrics
Ben-e-king-stand-by-me-lyrics
Ben-e-king-stand-by-me.txt
Wrote lyrics to: Ben-e-king-stand-by-me.txt
https://genius.com/Mary-j-blige-work-that-lyrics
Mary-j-blige-work-that-lyrics
Mary-j-blige-work-that.txt
Wrote lyrics to: Mary-j-blige-work-that.txt
https://genius.com/Panic-at-the-disco-high-hopes-lyrics
Panic-at-the-disco-high-hopes-lyrics
Panic-at-the-disco-high-hopes.txt
Wrote lyrics to: Panic-at-the-disco-high-hopes.txt
https://genius.com/The-ojays-love-train-lyrics
The-ojays-love-train-lyrics
The-ojays-love-train.txt
Wrote lyrics to: The-ojays-love-train.txt
https://genius.com/Diana-ross-im-coming-out-lyrics
Diana-ross-im-coming-out-lyrics
Diana

https://genius.com/Lynyrd-skynyrd-sweet-home-alabama-lyrics
Lynyrd-skynyrd-sweet-home-alabama-lyrics
Lynyrd-skynyrd-sweet-home-alabama.txt
Wrote lyrics to: Lynyrd-skynyrd-sweet-home-alabama.txt
https://genius.com/Elton-john-tiny-dancer-lyrics
Elton-john-tiny-dancer-lyrics
Elton-john-tiny-dancer.txt
Wrote lyrics to: Elton-john-tiny-dancer.txt
https://genius.com/Survivor-eye-of-the-tiger-lyrics
Survivor-eye-of-the-tiger-lyrics
Survivor-eye-of-the-tiger.txt
Wrote lyrics to: Survivor-eye-of-the-tiger.txt
https://genius.com/Tom-petty-and-the-heartbreakers-american-girl-lyrics
Tom-petty-and-the-heartbreakers-american-girl-lyrics
Tom-petty-and-the-heartbreakers-american-girl.txt
Wrote lyrics to: Tom-petty-and-the-heartbreakers-american-girl.txt
https://genius.com/Ariana-grande-god-is-a-woman-lyrics
Ariana-grande-god-is-a-woman-lyrics
Ariana-grande-god-is-a-woman.txt
Wrote lyrics to: Ariana-grande-god-is-a-woman.txt
https://genius.com/Childish-gambino-redbone-lyrics
Childish-gambino-redbone-ly

Hurray! We got our lyrics!