# Instructions - Scraping popular songs
Your product will take a song as an input from the user and will output another song (the recommendation). In most cases, the recommended song will have to be similar to the inputted song, but the CTO thinks that if the song is on the top charts at the moment, the user will enjoy more a recommendation of a song that's also popular at the moment. <br>
<br>
You have find data on the internet about currently popular songs. Billboard maintains a weekly Top 100 of "hot" songs here: https://www.billboard.com/charts/hot-100.<br>
<br>
It's a good place to start! Scrape the current top 100 songs and their respective artists, and put the information into a pandas dataframe.

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re

In [2]:
url = "https://www.billboard.com/charts/hot-100"
response = requests.get(url)
response.status_code

200

In [3]:
soup = BeautifulSoup(response.content, "html.parser")
# print(soup.prettify())

In [None]:
# soup.find_all("div", attrs={"class": "o-chart-results-list-row-container"}) 

In [7]:
# The first song in the list has a different format than the rest

cls = "c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 u-font-size-23@tablet " \
      "lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis " \
      "u-max-width-245 u-max-width-230@tablet-only u-letter-spacing-0028@tablet"

soup.find_all("h3", attrs={"class": cls}) 

[<h3 class="c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 u-font-size-23@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-245 u-max-width-230@tablet-only u-letter-spacing-0028@tablet" id="title-of-a-story">
 
 	
 	
 		
 					Last Night		
 	
 </h3>]

In [8]:
titles = [soup.find("h3", attrs={"class": cls}).get_text()]
titles

['\n\n\t\n\t\n\t\t\n\t\t\t\t\tLast Night\t\t\n\t\n']

In [10]:
# Rest of the song titles

cls = "c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tablet " \
      "lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis " \
      "u-max-width-330 u-max-width-230@tablet-only"

soup.find_all("h3", attrs={"class": cls}) 

[<h3 class="c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-330 u-max-width-230@tablet-only" id="title-of-a-story">
 
 	
 	
 		
 					Flowers		
 	
 </h3>,
 <h3 class="c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-330 u-max-width-230@tablet-only" id="title-of-a-story">
 
 	
 	
 		
 					Fast Car		
 	
 </h3>,
 <h3 class="c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-330 u-max-width-230@tablet-only" id="title-of-a-story">
 
 	
 	
 		
 					Calm Down		
 	
 </h3>,
 <h3 class="c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tabl

In [12]:
num_iter = len(soup.find_all("h3", attrs={"class": cls}))

for i in range(num_iter):
    titles.append(soup.find_all("h3", attrs={"class": cls})[i].get_text())

titles

['\n\n\t\n\t\n\t\t\n\t\t\t\t\tLast Night\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tFlowers\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tFast Car\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tCalm Down\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tAll My Life\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tFavorite Song\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tKill Bill\t\t\n\t\n',
 "\n\n\t\n\t\n\t\t\n\t\t\t\t\tCreepin'\t\t\n\t\n",
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tKarma\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tElla Baila Sola\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tSure Thing\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tAnti-Hero\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tDie For You\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tSomething In The Orange\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tSnooze\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tLa Bebe\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tWhere She Goes\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tUn x100to\t\t\n\t\n',
 '\n\n\t\n\t\n\t\t\n\t\t\t\t\tNeed A Favor

In [13]:
# Shorten the list to the first 100 and remove extra characters 

titles = titles[:100]

titles = [re.sub(r'[\r\n\t]', '', x) for x in titles]
titles 

['Last Night',
 'Flowers',
 'Fast Car',
 'Calm Down',
 'All My Life',
 'Favorite Song',
 'Kill Bill',
 "Creepin'",
 'Karma',
 'Ella Baila Sola',
 'Sure Thing',
 'Anti-Hero',
 'Die For You',
 'Something In The Orange',
 'Snooze',
 'La Bebe',
 'Where She Goes',
 'Un x100to',
 'Need A Favor',
 'Search & Rescue',
 'You Proof',
 "Thinkin' Bout Me",
 'Chemical',
 'Cupid',
 'Rock And A Hard Place',
 'Eyes Closed',
 "Boy's A Liar, Pt. 2",
 'Next Thing You Know',
 'Put It On Da Floor Again',
 'Thought You Should Know',
 "I'm Good (Blue)",
 'Dance The Night',
 'Area Codes',
 "Dancin' In The Country",
 'One Thing At A Time',
 'Memory Lane',
 'Bzrp Music Sessions, Vol. 55',
 'Tennessee Orange',
 'Cruel Summer',
 'TQM',
 'Stand By Me',
 'Religiously',
 'Dial Drunk',
 'Under The Influence',
 'Players',
 'Calling',
 'Annihilate',
 'Take Two',
 'Love You Anyway',
 'Thank God',
 'Am I Dreaming',
 'Princess Diana',
 'Bye',
 'Self Love',
 'It Matters To Her',
 'Daylight',
 'PRC',
 'Por Las Noches',
 'Mou

In [14]:
len(titles)

100

In [15]:
# First artist

cls = "c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max " \
      "u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 " \
      "u-max-width-230@tablet-only u-font-size-20@tablet"

soup.find_all("span", attrs={"class": cls}) 

[<span class="c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only u-font-size-20@tablet">
 	
 	Morgan Wallen
 </span>]

In [16]:
artists = [soup.find("span", attrs={"class": cls}) .get_text()]
artists

['\n\t\n\tMorgan Wallen\n']

In [17]:
# Rest of the artists

cls = "c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only"

num_iter = len(soup.find_all("span", attrs={"class": cls}))

for i in range(num_iter):
    artists.append(soup.find_all("span", attrs={"class": cls})[i].get_text())

In [18]:
# Shorten the list to the first 100 and remove extra characters 

artists = artists[:100]

artists = [re.sub(r'[\r\n\t]', '', x) for x in artists]
artists 

['Morgan Wallen',
 'Miley Cyrus',
 'Luke Combs',
 'Rema & Selena Gomez',
 'Lil Durk Featuring J. Cole',
 'Toosii',
 'SZA',
 'Metro Boomin, The Weeknd & 21 Savage',
 'Taylor Swift Featuring Ice Spice',
 'Eslabon Armado X Peso Pluma',
 'Miguel',
 'Taylor Swift',
 'The Weeknd & Ariana Grande',
 'Zach Bryan',
 'SZA',
 'Yng Lvcas x Peso Pluma',
 'Bad Bunny',
 'Grupo Frontera X Bad Bunny',
 'Jelly Roll',
 'Drake',
 'Morgan Wallen',
 'Morgan Wallen',
 'Post Malone',
 'Fifty Fifty',
 'Bailey Zimmerman',
 'Ed Sheeran',
 'PinkPantheress & Ice Spice',
 'Jordan Davis',
 'Latto Featuring Cardi B',
 'Morgan Wallen',
 'David Guetta & Bebe Rexha',
 'Dua Lipa',
 'Kali',
 'Tyler Hubbard',
 'Morgan Wallen',
 'Old Dominion',
 'Bizarrap & Peso Pluma',
 'Megan Moroney',
 'Taylor Swift',
 'Fuerza Regida',
 'Lil Durk Featuring Morgan Wallen',
 'Bailey Zimmerman',
 'Noah Kahan',
 'Chris Brown',
 'Coi Leray',
 'Metro Boomin, Swae Lee & NAV Featuring A Boogie Wit da Hoodie',
 'Metro Boomin, Swae Lee, Lil Wayne &

In [19]:
len(artists)

100

In [20]:
# Create dataframe

songs = pd.DataFrame({"title": titles, "artist": artists})
songs

Unnamed: 0,title,artist
0,Last Night,Morgan Wallen
1,Flowers,Miley Cyrus
2,Fast Car,Luke Combs
3,Calm Down,Rema & Selena Gomez
4,All My Life,Lil Durk Featuring J. Cole
...,...,...
95,Save Me,Jelly Roll With Lainey Wilson
96,Yandel 150,Yandel & Feid
97,Beso,Rosalia & Rauw Alejandro
98,I Wrote The Book,Morgan Wallen
