# Scraping Metrolyrics
### PART 1

In [1]:
import requests
import pandas as pd
import re
from bs4 import BeautifulSoup

In [3]:
# Scrape all pages of search results for your musician, and save as a CSV file. Include the following fields:

# Song title
# URL
# Popularity
# Year
# Bonus: Make the popularity a normal number (e.g., 6)

In [250]:
def scrap_songs_by_artist(artist: str, total_pages: int):
    rows = []
    for num in range(1, total_pages + 1):
        url = f'http://www.metrolyrics.com/{artist}-alpage-{num}.html'
        print('Scraping', url)
        response = requests.get(url)
        doc = BeautifulSoup(response.text)
        songs = doc.tbody.find_all('tr', class_ = False)
        for song in songs:
            row = {}
            try:
                row['title'] = re.findall(r'(.+) Lyrics', song.find('td', class_ = False).a.text)[0]
            except:
                pass
            try:
                row['url'] = song.a['href']
            except:
                pass
            try:
                row['year'] = song.find('td', class_ = False).find_next_sibling('td').text
            except:
                pass
            try:
                row['popularity'] = re.findall(r'popular(\d)', song.span['class'][1])[0]
            except:
                pass
            rows.append(row)
        df = pd.DataFrame(rows)
    return df


In [262]:
oasis_songs = scrap_songs_by_artist('oasis', 3)
oasis_songs.head()

Scraping http://www.metrolyrics.com/oasis-alpage-1.html
Scraping http://www.metrolyrics.com/oasis-alpage-2.html
Scraping http://www.metrolyrics.com/oasis-alpage-3.html


Unnamed: 0,popularity,title,url,year
0,0,(Get Off Your) High Horse Lady,http://www.metrolyrics.com/get-off-your-high-h...,2008
1,1,(I Got) The Fever,http://www.metrolyrics.com/i-got-the-fever-lyr...,2007
2,0,(Probably) All in the Mind,http://www.metrolyrics.com/probably-all-in-the...,2008
3,4,(You've Got) A Heart Of A Star,http://www.metrolyrics.com/youve-got-a-heart-o...,2007
4,1,A Bell Will Ring,http://www.metrolyrics.com/a-bell-will-ring-ly...,2005


In [268]:
oasis_songs = oasis_songs[['title', 'year', 'url', 'popularity']]
oasis_songs.to_csv('Metrolyrics_oasis_songs.csv', index = False)

### PART 2

In [270]:
# Metrolyrics, Part 2: Scrape the lyrics pages

# Then, open your search results csv, and scrape the following field:

# Lyrics
# Merge with your original song information and save as a new CSV file

# Tip: If you use .find for your lyrics, they'll have a bunch of ads inside! 
# You can use the ingredients/directions trick from above, or you can clean them with regex.

In [273]:
df = pd.read_csv('Metrolyrics_oasis_songs.csv')
df.head()

Unnamed: 0,title,year,url,popularity
0,(Get Off Your) High Horse Lady,2008,http://www.metrolyrics.com/get-off-your-high-h...,0
1,(I Got) The Fever,2007,http://www.metrolyrics.com/i-got-the-fever-lyr...,1
2,(Probably) All in the Mind,2008,http://www.metrolyrics.com/probably-all-in-the...,0
3,(You've Got) A Heart Of A Star,2007,http://www.metrolyrics.com/youve-got-a-heart-o...,4
4,A Bell Will Ring,2005,http://www.metrolyrics.com/a-bell-will-ring-ly...,1


In [414]:
def get_lyrics(data):
    print('Scraping lyrics of', data['title'])
    response = requests.get(data['url'])
    doc = BeautifulSoup(response.text)
    row = {}
    
    title = doc.find(class_ = 'banner-heading').h1.text
    row['title'] = re.findall(r'(.+) Lyrics.+', title)[0]
    
    body = doc.find('div', id = 'lyrics-body-text')
    verse = body.find_all(class_ = 'verse')
    verse = '\n'.join([line.text for line in verse])
    row['verse'] = verse
    return pd.Series(row)

In [416]:
oasis_lyrics = df.apply(get_lyrics, axis = 1)

Scraping lyrics of (Get Off Your) High Horse Lady
Scraping lyrics of (Get Off Your) High Horse Lady
Scraping lyrics of (I Got) The Fever
Scraping lyrics of (Probably) All in the Mind
Scraping lyrics of (You've Got) A Heart Of A Star
Scraping lyrics of A Bell Will Ring
Scraping lyrics of A Quick Peep
Scraping lyrics of Acquiesce
Scraping lyrics of Ain't Got Nothin
Scraping lyrics of Alice
Scraping lyrics of Alive
Scraping lyrics of All Around The World
Scraping lyrics of All Around the World (Reprise)
Scraping lyrics of All in The Mind
Scraping lyrics of All You Need is Love
Scraping lyrics of Angel Child
Scraping lyrics of Bag It Up
Scraping lyrics of Be Here Now
Scraping lyrics of Better Let You Know
Scraping lyrics of Better Man
Scraping lyrics of Bonehead's Bank Holiday
Scraping lyrics of Born on a Different Cloud
Scraping lyrics of Bring It On Down
Scraping lyrics of Can You See It Now? (I Can See It Now)
Scraping lyrics of Carnation
Scraping lyrics of Carry Us All
Scraping lyrics 

In [420]:
oasis_df = oasis_songs.join(oasis_lyrics, rsuffix = '_scrapped').drop(columns = 'title_scrapped')
oasis_df.head()

Unnamed: 0,title,year,url,popularity,verse
0,(Get Off Your) High Horse Lady,2008,http://www.metrolyrics.com/get-off-your-high-h...,0,"Get off your high horse Lady, I don't need a r..."
1,(I Got) The Fever,2007,http://www.metrolyrics.com/i-got-the-fever-lyr...,1,I'm tired of my life but my heads alright\nI g...
2,(Probably) All in the Mind,2008,http://www.metrolyrics.com/probably-all-in-the...,0,Show me who you are\nI'll show you what you lo...
3,(You've Got) A Heart Of A Star,2007,http://www.metrolyrics.com/youve-got-a-heart-o...,4,"You're never gonna get along, hanging out, thi..."
4,A Bell Will Ring,2005,http://www.metrolyrics.com/a-bell-will-ring-ly...,1,"A little space, a little time\nSee what it can..."


In [422]:
oasis_df.to_csv('oasis_lyrics.csv', index = False)

# Cage the Elephant toooo

In [424]:
cte = scrap_songs_by_artist('cage-the-elephant', 1)

Scraping http://www.metrolyrics.com/cage-the-elephant-alpage-1.html


In [426]:
cte_lyrics = cte.apply(get_lyrics, axis = 1)

Scraping lyrics of 2024
Scraping lyrics of 2024
Scraping lyrics of Aberdeen
Scraping lyrics of Aint No Rest For The Wicked
Scraping lyrics of Always Something
Scraping lyrics of Around My Head
Scraping lyrics of Back Against The Wall
Scraping lyrics of Back Stabbin' Betty
Scraping lyrics of Black Madonna
Scraping lyrics of Broken Boy
Scraping lyrics of Cigarette Daydreams
Scraping lyrics of Cold Cold Cold
Scraping lyrics of Come A Little Closer
Scraping lyrics of Cry Baby
Scraping lyrics of Dance Dance
Scraping lyrics of Dr. Dr. Dr.
Scraping lyrics of Drones In The Valley
Scraping lyrics of Flow
Scraping lyrics of Free Love
Scraping lyrics of Golden Brown
Scraping lyrics of Goodbye
Scraping lyrics of Halo
Scraping lyrics of House of Glass
Scraping lyrics of How Are You True
Scraping lyrics of In One Ear
Scraping lyrics of Indy Kidz
Scraping lyrics of Instant Crush
Scraping lyrics of It's Just Forever
Scraping lyrics of James Brown
Scraping lyrics of Japanese Buffalo
Scraping lyrics of 

In [429]:
cte_lyrics.to_csv('cage_the_elephant_lyrics.csv', index = False)