In [15]:
import pandas as pd
import requests
import re

from bs4 import BeautifulSoup

In [16]:
rows = []
for page_num in range(0,5):
    
    myurl = f'http://www.metrolyrics.com/rem-alpage-{page_num}.html'
    response = requests.get(myurl)
    doc = BeautifulSoup(response.text)
   
    songs = doc.find('tbody').find_all('tr')

    for song in songs:  
        row = {}
        row['Title'] = song.find_all('td')[1].text
        row['URL'] = song.find_all('td')[1].find('a')['href']
        try:
            row['Year'] = song.find_all('td')[-2].text
        except:
            pass
        try:
            row['Popularity'] = song.find('span')['class']
        except:
            pass
        rows.append(row)

In [17]:
rows

[{'Title': "\n\nIt's The End Of The World Lyrics\n\n",
  'URL': 'http://www.metrolyrics.com/its-the-end-of-the-world-lyrics-rem.html',
  'Year': '2008',
  'Popularity': ['bar', 'popular10']},
 {'Title': '\n\nLosing My Religion Lyrics\n\n',
  'URL': 'http://www.metrolyrics.com/losing-my-religion-lyrics-rem.html',
  'Year': '2003',
  'Popularity': ['bar', 'popular9']},
 {'Title': '\n\nThe One I Love Lyrics\n\n',
  'URL': 'http://www.metrolyrics.com/the-one-i-love-lyrics-rem.html',
  'Year': '2008',
  'Popularity': ['bar', 'popular9']},
 {'Title': '\n\nSelingkar Kasih Lyrics\n\n',
  'URL': 'http://www.metrolyrics.com/selingkar-kasih-lyrics-rem.html',
  'Year': '2010',
  'Popularity': ['bar', 'popular7']},
 {'Title': '\n\nMan On The Moon Lyrics\n\n',
  'URL': 'http://www.metrolyrics.com/man-on-the-moon-lyrics-rem.html',
  'Year': '2003',
  'Popularity': ['bar', 'popular7']},
 {'Title': '\n\nThe Sidewinder Sleeps Tonight Lyrics\n\n',
  'URL': 'http://www.metrolyrics.com/the-sidewinder-sleep

In [18]:
df = pd.DataFrame(rows)

In [19]:
df = df[['Title','Year','URL','Popularity']]

In [20]:
df['Title'] = df['Title'].str.replace("\n", "")
df['Title'] = df['Title'].str.replace("Lyrics", "")
df['URL'] = df.URL.str.replace("http://www.metrolyrics.com", "")


In [21]:
df['Popularity'] = df['Popularity'].astype(str)
df['Popularity'] = df['Popularity'].str.extract(r"(\d+)")

In [22]:
df.head()

Unnamed: 0,Title,Year,URL,Popularity
0,It's The End Of The World,2008,/its-the-end-of-the-world-lyrics-rem.html,10
1,Losing My Religion,2003,/losing-my-religion-lyrics-rem.html,9
2,The One I Love,2008,/the-one-i-love-lyrics-rem.html,9
3,Selingkar Kasih,2010,/selingkar-kasih-lyrics-rem.html,7
4,Man On The Moon,2003,/man-on-the-moon-lyrics-rem.html,7


In [23]:
df.to_csv("song_info.csv", index = False)

### Part II

In [24]:
def scrape_page(row):
    try:
        myurl = f"http://www.metrolyrics.com{row['URL']}"
        response = requests.get(myurl)
        doc = BeautifulSoup(response.text)

        page = {}

        lyrics = doc.find(class_="lyrics-body")
        verses = lyrics.find_all(class_="verse")
        page['verses'] = "\n".join([verse.text for verse in verses]) 

        return pd.Series(page)
    
    except:
        return pd.Series({})
    

In [25]:
scrape_page(df.loc[2])

verses    This one goes out to the one I love\nThis one ...
dtype: object

In [26]:
scraped_df = df.apply(scrape_page,axis=1)

In [29]:
scraped_df = df.merge(scraped_df, left_index=True, right_index=True)
scraped_df

Unnamed: 0,Title_x,Year_x,URL_x,Popularity_x,Title_y,Year_y,URL_y,Popularity_y,verses
0,It's The End Of The World,2008,/its-the-end-of-the-world-lyrics-rem.html,10,It's The End Of The World,2008,/its-the-end-of-the-world-lyrics-rem.html,10,"That's great, it starts with an earthquake\nBi..."
1,Losing My Religion,2003,/losing-my-religion-lyrics-rem.html,9,Losing My Religion,2003,/losing-my-religion-lyrics-rem.html,9,"Oh life, it's bigger\nIt's bigger than you\nAn..."
2,The One I Love,2008,/the-one-i-love-lyrics-rem.html,9,The One I Love,2008,/the-one-i-love-lyrics-rem.html,9,This one goes out to the one I love\nThis one ...
3,Selingkar Kasih,2010,/selingkar-kasih-lyrics-rem.html,7,Selingkar Kasih,2010,/selingkar-kasih-lyrics-rem.html,7,Seharusnya cinta kita bisa tersenyum\nSeolah s...
4,Man On The Moon,2003,/man-on-the-moon-lyrics-rem.html,7,Man On The Moon,2003,/man-on-the-moon-lyrics-rem.html,7,"and the game of Life\nYeah, yeah, yeah, yeah\n..."
5,The Sidewinder Sleeps Tonight,2006,/the-sidewinder-sleeps-tonight-lyrics-rem.html,7,The Sidewinder Sleeps Tonight,2006,/the-sidewinder-sleeps-tonight-lyrics-rem.html,7,This here is the place i will be staying.\nthe...
6,Nightswimming,2003,/nightswimming-lyrics-rem.html,6,Nightswimming,2003,/nightswimming-lyrics-rem.html,6,Nightswimming deserves a quiet night\nThe phot...
7,Everybody Hurts,2003,/everybody-hurts-lyrics-rem.html,6,Everybody Hurts,2003,/everybody-hurts-lyrics-rem.html,6,"When your day is long\nAnd the night, the nigh..."
8,Fall On Me,2011,/fall-on-me-lyrics-rem.html,6,Fall On Me,2011,/fall-on-me-lyrics-rem.html,6,"There's a problem, feathers, iron\nBargain bui..."
9,Orange Crush,2003,/orange-crush-lyrics-rem.html,5,Orange Crush,2003,/orange-crush-lyrics-rem.html,5,"(Follow me, don't follow me)\nI've got my spin..."


In [28]:
scraped_df.to_csv("lyrics_merged.csv", index=False)