In [1]:
# 1. import libraries

from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd 

# 2. find the url and store it in a variable

url = "https://www.billboard.com/charts/hot-100"

# 3. download html with a get request

response = requests.get(url)

In [2]:
# get the status code response

response.status_code

200

In [3]:
soup = BeautifulSoup(response.content, 'html.parser')

In [4]:
# prettify the html code within the soup variable

print(soup.prettify())

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [5]:
# this will select the name of the first song

soup.select("span.chart-element__information__song")[0].text

'Save Your Tears'

In [6]:
# this will select the name of the first song as well

soup.select("li.chart-list__element:nth-child(1) > button:nth-child(1) > span:nth-child(2) > span:nth-child(1)")[0].text 

'Save Your Tears'

In [7]:
# this will select the name of all songs

soup.select("span.chart-element__information__song")

[<span class="chart-element__information__song text--truncate color--primary">Save Your Tears</span>,
 <span class="chart-element__information__song text--truncate color--primary">Leave The Door Open</span>,
 <span class="chart-element__information__song text--truncate color--primary">Peaches</span>,
 <span class="chart-element__information__song text--truncate color--primary">Rapstar</span>,
 <span class="chart-element__information__song text--truncate color--primary">Levitating</span>,
 <span class="chart-element__information__song text--truncate color--primary">Kiss Me More</span>,
 <span class="chart-element__information__song text--truncate color--primary">Montero (Call Me By Your Name)</span>,
 <span class="chart-element__information__song text--truncate color--primary">Astronaut In The Ocean</span>,
 <span class="chart-element__information__song text--truncate color--primary">Up</span>,
 <span class="chart-element__information__song text--truncate color--primary">Drivers License

In [8]:
# but we want to select only the name of the artist with the No.1 Song in the chart

soup.select("span.chart-element__information__artist")[0].text

'The Weeknd & Ariana Grande'

In [9]:
soup.select("span.chart-element__rank__number")[3].text

'4'

In [10]:
song_lst = soup.select("span.chart-element__information__song")

In [11]:
# how many songs are in a single page?

len(song_lst)

100

In [12]:
# create empty lists and name them title, artist and rank

title = []
artist = []
rank = []

len_songs = len(song_lst)

# iterate through each index and append into the empty lists
for i in range(len_songs):
    title.append(soup.select("span.chart-element__information__song")[i].text)
    artist.append(soup.select("span.chart-element__information__artist")[i].text)
    rank.append(soup.select("span.chart-element__rank__number")[i].text)


In [13]:
# create a DataFrame 
# with 'title', 'artist' and 'rank' as keys
# and use the lists we create in previous cell as values

songs = pd.DataFrame({'title' : title,
                      'artist' : artist,
                      'rank' : rank })


In [14]:
# inspect the newly created DataFrame a bit
# so we can see as above
# Rank 1 Artist is The Weeknd and Ariana Grande
# And that the No.1 Song Title is Save Your Tears

songs

Unnamed: 0,title,artist,rank
0,Save Your Tears,The Weeknd & Ariana Grande,1
1,Leave The Door Open,Silk Sonic (Bruno Mars & Anderson .Paak),2
2,Peaches,Justin Bieber Featuring Daniel Caesar & Giveon,3
3,Rapstar,Polo G,4
4,Levitating,Dua Lipa Featuring DaBaby,5
...,...,...,...
95,4 Da Gang,42 Dugg & Roddy Ricch,96
96,Blame It On You,Jason Aldean,97
97,Wasted On You,Morgan Wallen,98
98,Way Less Sad,AJR,99


In [15]:
songs.shape

# 100 rows and 3 columns

(100, 3)

In [16]:
# let's try the describe() function just to see what happens

songs.describe()

Unnamed: 0,title,artist,rank
count,100,100,100
unique,100,90,100
top,Heartbreak Anniversary,Moneybagg Yo,16
freq,1,6,1


In [17]:
# let's try the .info() function just to see what happens

songs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   title   100 non-null    object
 1   artist  100 non-null    object
 2   rank    100 non-null    object
dtypes: object(3)
memory usage: 2.5+ KB


In [18]:
# now we can extract DataFrame into a .csv file

songs.to_csv("billboard_top100.csv")


#### Web Scraping multiple pages

In [19]:
data = pd.read_csv("billboard_top100.csv")

In [20]:
# import this library for "for loops" bar progress 

from tqdm import tqdm

In [21]:
# check the head
# "Unnamed:0" is irrelevant, we will drop it 
data.head(5)

Unnamed: 0.1,Unnamed: 0,title,artist,rank
0,0,Save Your Tears,The Weeknd & Ariana Grande,1
1,1,Leave The Door Open,Silk Sonic (Bruno Mars & Anderson .Paak),2
2,2,Peaches,Justin Bieber Featuring Daniel Caesar & Giveon,3
3,3,Rapstar,Polo G,4
4,4,Levitating,Dua Lipa Featuring DaBaby,5


In [22]:
# create a copy and store under df

df = data

In [23]:
# drop "Unnamed" column as we already have an index

df.drop(["Unnamed: 0"], axis = 1, inplace = True)


In [24]:
# check the df

df.head(5)

Unnamed: 0,title,artist,rank
0,Save Your Tears,The Weeknd & Ariana Grande,1
1,Leave The Door Open,Silk Sonic (Bruno Mars & Anderson .Paak),2
2,Peaches,Justin Bieber Featuring Daniel Caesar & Giveon,3
3,Rapstar,Polo G,4
4,Levitating,Dua Lipa Featuring DaBaby,5


In [25]:
# turn the song names into lowercase
# in case people type with lowercase to avoid input errors later


df["title"] = df["title"].str.lower()

In [26]:
# get the unique song names and convert the Series into a list
# store them under hot100_lst variable

hot100_lst = list(df['title'].unique())
hot100_lst


['save your tears',
 'leave the door open',
 'peaches',
 'rapstar',
 'levitating',
 'kiss me more',
 'montero (call me by your name)',
 'astronaut in the ocean',
 'up',
 'drivers license',
 'blinding lights',
 'deja vu',
 'beat box',
 'calling my phone',
 'on me',
 'best friend',
 'heartbreak anniversary',
 'beautiful mistakes',
 'what you know bout love',
 'mood',
 "my ex's best friend",
 'back in blood',
 'without you',
 'the good ones',
 'go crazy',
 'you broke me first.',
 '34+35',
 'hell of a view',
 'forever after all',
 "what's next",
 'time today',
 'therefore i am',
 'dakiti',
 'positions',
 'shottas (lala)',
 'solid',
 'track star',
 'ski',
 'wants and needs',
 "you're mines still",
 "we're good",
 'heat waves',
 'made for you',
 'good days',
 'for the night',
 "breaking up was easy in the 90's",
 'no more parties',
 'goosebumps',
 'telepatia',
 'tombstone',
 'streets',
 'go!',
 'hard for the next',
 'if pain was a person',
 "what's your country song",
 'just the way',
 'hold

In [27]:
# import random built-in library from python

import random

In [28]:
# create an input


song = input("Put a name of song: ") 

Put a name of song: Rapstar


In [29]:
# build a for loop that recommends a song that is from hot100_lst
# 

for song in hot100_lst:
    if song in hot100_lst:
        #recommend another randomg song from hot100_lst
        print("recommend this: ", random.choice(hot100_lst))
    else:
        print("Song is not in the hotlist")

recommend this:  drivers license
recommend this:  famous friends
recommend this:  heat waves
recommend this:  beautiful mistakes
recommend this:  my head and my heart
recommend this:  for the night
recommend this:  good days
recommend this:  no more parties
recommend this:  34+35
recommend this:  ski
recommend this:  just say det
recommend this:  street runner
recommend this:  what you know bout love
recommend this:  drunk (and i don't wanna go home)
recommend this:  made for you
recommend this:  hell of a view
recommend this:  blame it on you
recommend this:  blame it on you
recommend this:  heartbreak anniversary
recommend this:  calling my phone
recommend this:  rapstar
recommend this:  track star
recommend this:  beautiful mistakes
recommend this:  you broke me first.
recommend this:  goosebumps
recommend this:  made for you
recommend this:  hard for the next
recommend this:  positions
recommend this:  goosebumps
recommend this:  breaking up was easy in the 90's
recommend this:  te

In [30]:
# define a function for the recommendations of top100

def recommendations(input):
    if song.lower() in hot100_lst:
        print("recommend: ", random.choice(hot100_lst))
    else:
        print("Song not in a hotlist")

In [31]:
# import difflib

import difflib

In [33]:
# test1
# create the same input again

song = input("Name a song: ")

Name a song: rapstar


In [34]:
invalid_inputs = [' ', '']

if song in hot100_lst:
    recommendations(song)
elif song in invalid_inputs:
    print('This is not a valid input. Please enter a song title.')
else:
    if difflib.get_close_matches(song, hot100_lst) == []:
        print("Song not in a hotlist")
    else:
        print('Ups, smth went wrong, did you mean:', difflib.get_close_matches(song, hot100_lst))


recommend:  blinding lights


In [35]:
recommendations(song)

recommend:  hard for the next


In [36]:
#if song in hot100_lst:
    #recommendation = random.choice(hot100_lst)
    #if recommendation != song: # make sure we do not recommend the same song twice
        #print("recommended: ", recommendation)
    #else: 
        #print("recommend: ", randomndom.choice(host100_lst))
#else:
    #print("Song not in the hotlist")