## Scraping for Data

In [28]:
# Make HTTP requests
import requests
# Scrape data from an HTML document
from bs4 import BeautifulSoup
# I/O
import os
# Search and manipulate strings
import re

import pandas as pd

import time

from secrets import GENIUS_API_TOKEN

In [29]:
# Get artist object from Genius API
def request_artist_info(artist_name, page):
    base_url = 'https://api.genius.com'
    headers = {'Authorization': 'Bearer ' + GENIUS_API_TOKEN}
    search_url = base_url + '/search?per_page=10&page=' + str(page)
    data = {'q': artist_name}
    response = requests.get(search_url, data=data, headers=headers)
    return response
# Get Genius.com song url's from artist object
def request_song_url(artist_name, song_cap):
    page = 1
    songs = []
    
    while True:
        response = request_artist_info(artist_name, page)
        json = response.json()
        # Collect up to song_cap song objects from artist
        song_info = []
        for hit in json['response']['hits']:
            if artist_name.lower() in hit['result']['primary_artist']['name'].lower():
                song_info.append(hit)
    
        # Collect song URL's from song objects
        for song in song_info:
            if (len(songs) < song_cap):
                url = song['result']['url']
                songs.append(url)
            
        if (len(songs) == song_cap):
            break
        else:
            page += 1
        
    print('Found {} songs by {}'.format(len(songs), artist_name))
    return songs

In [30]:
# Scrape lyrics from a Genius.com song URL
def scrape_song_lyrics(url):
    page = requests.get(url)
    html = BeautifulSoup(page.text, 'html.parser')
    lyrics = html.find('div', class_='lyrics').get_text()
    #remove identifiers like chorus, verse, etc
    lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)
    #remove empty lines
    lyrics = os.linesep.join([s for s in lyrics.splitlines() if s])         
    return lyrics

In [31]:
def write_lyrics_to_orig(artist_name, song_count):
    urls = request_song_url(artist_name, song_count)
    count = 0
    lyrics_list = []
    song_url = []
    df = pd.DataFrame()
    for url in urls:
        lyrics = scrape_song_lyrics(url)
        song_url.append(url)
        lyrics_list.append(lyrics)
        count += 1
        time.sleep(1.5)
    print(f'{count} {artist_name} songs written')
    d = {'lyrics': lyrics_list, 'url': song_url}
    df = pd.DataFrame(d, columns=['lyrics', 'url'])
    df['artist'] = artist_name
    return df.reset_index(drop=True)

In [1]:
def write_lyrics_to_file(artist_name, song_count):
    urls = request_song_url(artist_name, song_count)
    count = 0
    lyrics_list = []
    song_url = []
    df = pd.DataFrame()
    for url in urls:
        lyrics = scrape_song_lyrics(url)
        song_url.append(url)
        lyrics_list.append(lyrics)
        count += 1
        time.sleep(1.5)
    print(f'{count} {artist_name} songs written')
    d = {'lyrics': lyrics_list, 'url': song_url}
    df = pd.DataFrame(d, columns=['lyrics', 'url'])
    df['artist'] = artist_name
    df.to_csv('../data/country_lyrics.csv', mode='a', header=False, index=False)

#### Scraping for Data on as many Country artists I can think of.

In [6]:
luke_combs = write_lyrics_to_orig('Luke Combs', 60)

Found 60 songs by Luke Combs
60 Luke Combs songs written


In [None]:
luke_combs.to_csv('../data/country_lyrics.csv', index=False)

In [8]:
write_lyrics_to_file('Chris Stapleton', 46)

Found 46 songs by Chris Stapleton
46 Chris Stapleton songs written


In [9]:
write_lyrics_to_file('Lee Brice', 74)

Found 74 songs by Lee Brice
74 Lee Brice songs written


In [10]:
write_lyrics_to_file('Thomas Rhett', 75)

Found 75 songs by Thomas Rhett
75 Thomas Rhett songs written


In [11]:
write_lyrics_to_file('Carly Pearce', 33)

Found 33 songs by Carly Pearce
33 Carly Pearce songs written


In [12]:
write_lyrics_to_file('Eric Church', 75)

Found 75 songs by Eric Church
75 Eric Church songs written


In [13]:
write_lyrics_to_file('Dierks Bentley', 100)

Found 100 songs by Dierks Bentley
100 Dierks Bentley songs written


In [14]:
write_lyrics_to_file('Luke Bryan', 100)

Found 100 songs by Luke Bryan
100 Luke Bryan songs written


In [15]:
write_lyrics_to_file('Florida Georgia Line', 100)

Found 100 songs by Florida Georgia Line
100 Florida Georgia Line songs written


In [16]:
write_lyrics_to_file('Jordan Davis', 20)

Found 20 songs by Jordan Davis
20 Jordan Davis songs written


In [17]:
write_lyrics_to_file('Blake Shelton', 150)

Found 150 songs by Blake Shelton
150 Blake Shelton songs written


In [18]:
write_lyrics_to_file('Brett Young', 50)

Found 50 songs by Brett Young
50 Brett Young songs written


In [19]:
write_lyrics_to_file('Dan + Shay', 30)

Found 30 songs by Dan + Shay
30 Dan + Shay songs written


In [20]:
write_lyrics_to_file('Mitchell Tenpenny', 25)

Found 25 songs by Mitchell Tenpenny
25 Mitchell Tenpenny songs written


In [21]:
write_lyrics_to_file('Morgan Wallen', 50)

Found 50 songs by Morgan Wallen
50 Morgan Wallen songs written


In [22]:
write_lyrics_to_file('Jon Pardi', 40)

Found 40 songs by Jon Pardi
40 Jon Pardi songs written


In [23]:
write_lyrics_to_file('Kacey Musgraves', 50)

Found 50 songs by Kacey Musgraves
50 Kacey Musgraves songs written


In [24]:
write_lyrics_to_file('Jason Aldean', 75)

Found 75 songs by Jason Aldean
75 Jason Aldean songs written


In [25]:
write_lyrics_to_file('Kane Brown', 50)

Found 50 songs by Kane Brown
50 Kane Brown songs written


In [26]:
write_lyrics_to_file('Maren Morris', 30)

Found 30 songs by Maren Morris
30 Maren Morris songs written


In [27]:
write_lyrics_to_file('Sam Hunt', 37)

Found 37 songs by Sam Hunt
37 Sam Hunt songs written


In [28]:
write_lyrics_to_file('Old Dominion', 35)

Found 35 songs by Old Dominion
35 Old Dominion songs written


In [29]:
write_lyrics_to_file('Shania Twain', 100)

Found 100 songs by Shania Twain
100 Shania Twain songs written


In [30]:
write_lyrics_to_file('Carrie Underwood', 85)

Found 85 songs by Carrie Underwood
85 Carrie Underwood songs written


In [31]:
write_lyrics_to_file('The Chicks', 75)

Found 75 songs by The Chicks
75 The Chicks songs written


In [32]:
write_lyrics_to_file('Tim McGraw', 100)

Found 100 songs by Tim McGraw
100 Tim McGraw songs written


In [33]:
write_lyrics_to_file('Keith Urban', 100)

Found 100 songs by Keith Urban
100 Keith Urban songs written


In [34]:
write_lyrics_to_file('Miranda Lambert', 80)

Found 80 songs by Miranda Lambert
80 Miranda Lambert songs written


In [35]:
write_lyrics_to_file('Garth Brooks', 100)

Found 100 songs by Garth Brooks
100 Garth Brooks songs written


In [36]:
write_lyrics_to_file('Johnny Cash', 100)

Found 100 songs by Johnny Cash
100 Johnny Cash songs written


In [37]:
write_lyrics_to_file('Faith Hill', 100)

Found 100 songs by Faith Hill
100 Faith Hill songs written


In [38]:
write_lyrics_to_file('Dolly Parton', 150)

Found 150 songs by Dolly Parton
150 Dolly Parton songs written


In [39]:
write_lyrics_to_file('Brad Paisley', 100)

Found 100 songs by Brad Paisley
100 Brad Paisley songs written


In [40]:
write_lyrics_to_file('Taylor Swift', 200)

Found 200 songs by Taylor Swift
200 Taylor Swift songs written


In [41]:
write_lyrics_to_file('Rascal Flatts', 100)

Found 100 songs by Rascal Flatts
100 Rascal Flatts songs written


In [42]:
write_lyrics_to_file('Logan Mize', 30)

Found 30 songs by Logan Mize
30 Logan Mize songs written


In [43]:
write_lyrics_to_file('Chris Lane', 25)

Found 25 songs by Chris Lane
25 Chris Lane songs written


In [44]:
write_lyrics_to_file('Jimmie Allen', 18)

Found 18 songs by Jimmie Allen
18 Jimmie Allen songs written


In [45]:
write_lyrics_to_file('Filmore', 15)

Found 15 songs by Filmore
15 Filmore songs written


In [46]:
write_lyrics_to_file('Billy Ray Cyrus', 100)

Found 100 songs by Billy Ray Cyrus
100 Billy Ray Cyrus songs written


In [47]:
write_lyrics_to_file('Cole Swindell', 30)

Found 30 songs by Cole Swindell
30 Cole Swindell songs written


In [48]:
write_lyrics_to_file('The Band Perry', 25)

Found 25 songs by The Band Perry
25 The Band Perry songs written


In [49]:
write_lyrics_to_file('Lady A', 50)

Found 50 songs by Lady A
50 Lady A songs written


In [50]:
write_lyrics_to_file('George Strait', 200)

Found 200 songs by George Strait
200 George Strait songs written


In [51]:
write_lyrics_to_file('Ashley McBryde', 25)

Found 25 songs by Ashley McBryde
25 Ashley McBryde songs written


In [52]:
write_lyrics_to_file('Little Big Town', 65)

Found 65 songs by Little Big Town
65 Little Big Town songs written


In [53]:
write_lyrics_to_file('Sugarland', 65)

Found 65 songs by Sugarland
65 Sugarland songs written


In [54]:
write_lyrics_to_file('Willie Nelson', 200)

Found 200 songs by Willie Nelson
200 Willie Nelson songs written


In [55]:
write_lyrics_to_file('Kenny Chesney', 200)

Found 200 songs by Kenny Chesney
200 Kenny Chesney songs written


In [56]:
write_lyrics_to_file('Alan Jackson', 200)

Found 200 songs by Alan Jackson
200 Alan Jackson songs written


In [57]:
write_lyrics_to_file('Joe Nichols', 100)

Found 100 songs by Joe Nichols
100 Joe Nichols songs written


In [58]:
write_lyrics_to_file('Toby Keith', 100)

Found 100 songs by Toby Keith
100 Toby Keith songs written


In [59]:
write_lyrics_to_file('Patty Loveless', 100)

Found 100 songs by Patty Loveless
100 Patty Loveless songs written


In [60]:
write_lyrics_to_file('Vince Gill', 100)

Found 100 songs by Vince Gill
100 Vince Gill songs written


In [61]:
write_lyrics_to_file('Reba McEntire', 200)

Found 200 songs by Reba McEntire
200 Reba McEntire songs written


In [62]:
write_lyrics_to_file('Clint Black', 100)

Found 100 songs by Clint Black
100 Clint Black songs written


In [63]:
write_lyrics_to_file('Gabby Barrett', 30)

Found 30 songs by Gabby Barrett
30 Gabby Barrett songs written


In [64]:
write_lyrics_to_file('Tyler Childers', 30)

Found 30 songs by Tyler Childers
30 Tyler Childers songs written


In [65]:
write_lyrics_to_file('HARDY', 60)

Found 60 songs by HARDY
60 HARDY songs written


In [69]:
write_lyrics_to_file('Russell Dickerson', 25)

Found 25 songs by Russell Dickerson
25 Russell Dickerson songs written


In [70]:
write_lyrics_to_file('Midland', 100)

Found 100 songs by Midland
100 Midland songs written


In [71]:
write_lyrics_to_file('Rodney Atkins', 60)

Found 60 songs by Rodney Atkins
60 Rodney Atkins songs written


In [73]:
write_lyrics_to_file('Billy Currington', 60)

Found 60 songs by Billy Currington
60 Billy Currington songs written


In [74]:
write_lyrics_to_file('Scotty McCreery', 60)

Found 60 songs by Scotty McCreery
60 Scotty McCreery songs written


In [33]:
write_lyrics_to_file('Chase Rice', 60)

Found 60 songs by Chase Rice
60 Chase Rice songs written


In [34]:
write_lyrics_to_file('Dustin Lynch', 50)

Found 50 songs by Dustin Lynch
50 Dustin Lynch songs written


In [35]:
write_lyrics_to_file('Chris Young', 100)

Found 100 songs by Chris Young
100 Chris Young songs written


In [36]:
write_lyrics_to_file('Jake Owen', 70)

Found 70 songs by Jake Owen
70 Jake Owen songs written


In [37]:
write_lyrics_to_file('Morgan Evans', 11)

Found 11 songs by Morgan Evans
11 Morgan Evans songs written


In [38]:
write_lyrics_to_file('Eli Young Band', 85)

Found 85 songs by Eli Young Band
85 Eli Young Band songs written


In [39]:
write_lyrics_to_file('Tyler Rich', 11)

Found 11 songs by Tyler Rich
11 Tyler Rich songs written


In [41]:
write_lyrics_to_file('Hank Williams', 120)

Found 120 songs by Hank Williams
120 Hank Williams songs written


In [42]:
write_lyrics_to_file('Granger Smith', 100)

Found 100 songs by Granger Smith
100 Granger Smith songs written


In [43]:
write_lyrics_to_file('Josh Turner', 60)

Found 60 songs by Josh Turner
60 Josh Turner songs written
