# Hot songs

## Import libraries

In [1]:
import pandas as pd
import requests
from random import randint
from bs4 import BeautifulSoup

## Billboard's Hot100 (US charts)
https://www.billboard.com/charts/hot-100/

In [2]:
# Download html
response = requests.get('https://www.billboard.com/charts/hot-100/')

# Parse html (create the soup)
billboard = BeautifulSoup(response.content, 'html.parser')

Alternatively:

```python
with open('The Hot 100 – Billboard.html', encoding='UTF-8') as page:
    billboard = BeautifulSoup(page, 'html.parser')
```

But the data is static (saved version on the computer)

In [3]:
# Retrieve the songs
billboard_songs = [song.get_text(strip=True).lower() for song in
                   billboard.select('.o-chart-results-list__item #title-of-a-story')]
billboard_songs[:5]

["all too well (taylor's version)",
 'easy on me',
 'stay',
 'industry baby',
 'smokin out the window']

In [4]:
# Retrieve the artists
billboard_artists = [artist.get_text(strip=True).lower() for artist in
                     billboard.select('.lrv-u-width-100p .c-label.a-no-trucate')]
billboard_artists[:5]

['taylor swift',
 'adele',
 'the kid laroi & justin bieber',
 'lil nas x & jack harlow',
 'silk sonic (bruno mars & anderson .paak)']

Alternatively:
```python
billboard_artists = [artist.get_text().strip() for artist in billboard.select('.lrv-u-width-100p .c-label:nth-child(2)')]
```

**Breaking down the list comprehensions**

Returns a list of all ocurrences of the *selector*
```python
billboard.select('selector')[0] # First item as example
```
```
>> <h3 class="c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 u-font-size-23@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-245 u-max-width-230@tablet-only u-letter-spacing-0028@tablet" id="title-of-a-story">
Easy On Me
</h3>
```

`.get_text()` returns only the text, ignoring the tags
```python
song.get_text()
```
```
>> '\nEasy On Me\n'
```

Setting `strip=True` removes the leading and trailing `\n` (new line character).

In [5]:
# Print the date of the ranking ('\033[1m' makes it bold, '\033[91m' makes it red)
print('\033[1m' + '\033[91m' + billboard.select('.c-tagline.a-font-primary-medium-xs')[0].get_text())

# Create dataframe to store the data
billboard_df = pd.DataFrame({
    'song': billboard_songs,
    'artist': billboard_artists
})

billboard_df.head()

[1m[91mWeek of November 27, 2021


Unnamed: 0,song,artist
0,all too well (taylor's version),taylor swift
1,easy on me,adele
2,stay,the kid laroi & justin bieber
3,industry baby,lil nas x & jack harlow
4,smokin out the window,silk sonic (bruno mars & anderson .paak)


# UK Charts
https://www.officialcharts.com/charts/singles-chart/

In [6]:
response = requests.get('https://www.officialcharts.com/charts/singles-chart/')
uk_charts = BeautifulSoup(response.content, 'html.parser')

Like in the previous case:

```python
with open('Official Singles Chart Top 100 _ UK.html') as page:
    uk_charts = BeautifulSoup(page, 'html.parser')
```

In [7]:
uk_songs = [song.get_text(strip=True).lower() for song in uk_charts.select('.title')]
uk_songs[:5]

['easy on me',
 'shivers',
 "all too well (taylor's version)",
 'bad habits',
 'flowers (say my name)']

In [8]:
uk_artists = [artist.get_text(strip=True).lower() for artist in uk_charts.select('.artist')]
uk_artists[:5]

['adele', 'ed sheeran', 'taylor swift', 'ed sheeran', 'arrdee']

In [9]:
print('\033[1m' + '\033[91m' + uk_charts.select('.article-date')[0].get_text(strip=True))

uk_df = pd.DataFrame({
    'song': uk_songs,
    'artist': uk_artists
})
uk_df.head()

[1m[91m19 November 2021 -  25 November 2021


Unnamed: 0,song,artist
0,easy on me,adele
1,shivers,ed sheeran
2,all too well (taylor's version),taylor swift
3,bad habits,ed sheeran
4,flowers (say my name),arrdee


## Germany Charts
https://www.offiziellecharts.de/charts/single

In [10]:
# Download html
response = requests.get('https://www.offiziellecharts.de/charts/single')

# Parse html (create the soup)
offizielle_soup = BeautifulSoup(response.content, 'html.parser')

In [11]:
germany_songs = [song.get_text().lower() for song in offizielle_soup.select('.info-title')]
germany_songs[:5]

['der letzte song', 'shivers', 'cold heart', 'heat waves', 'do it to it']

In [12]:
germany_artists = [artist.get_text().lower() for artist in offizielle_soup.select('.info-artist')]
germany_artists[:5]

['kummer feat. fred rabe',
 'ed sheeran',
 'elton john & dua lipa',
 'glass animals',
 'acraze feat. cherish']

In [13]:
germany_df = pd.DataFrame({
    'song': germany_songs,
    'artist': germany_artists    
})
germany_df.head()

Unnamed: 0,song,artist
0,der letzte song,kummer feat. fred rabe
1,shivers,ed sheeran
2,cold heart,elton john & dua lipa
3,heat waves,glass animals
4,do it to it,acraze feat. cherish


## The hot songs

In [14]:
# Join all songs in one series and remove the duplicates
combined_df = pd.concat([billboard_df, uk_df, germany_df])
combined_df = combined_df.drop_duplicates('song').reset_index(drop=True)
combined_df

Unnamed: 0,song,artist
0,all too well (taylor's version),taylor swift
1,easy on me,adele
2,stay,the kid laroi & justin bieber
3,industry baby,lil nas x & jack harlow
4,smokin out the window,silk sonic (bruno mars & anderson .paak)
...,...,...
220,in da getto,j balvin & skrillex
221,stimmen,musso
222,wasted love,ofenbach feat. lagique
223,mond,montez x badmómzjay
