In [3]:
import requests
from bs4 import BeautifulSoup

In [4]:
%pip install beautifulsoup4

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
# Function to scrape links from a webpage
def scrape_links(url):
    # Send a GET request to the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return []
    
    # Parse the page content
    page_content = response.content
    soup = BeautifulSoup(page_content, 'html.parser')
    
    # Find all 'a' tags (which define hyperlinks)
    links = soup.find_all('a')
    table_rows = soup.find_all('tr')
    # Extract href and text of each link
    link_info = [{'href': link.get('href'), 'text': link.get_text()} for link in links if link.get('href')]
    song_links = {}
    #so i want to loop through all links that start with /track and text is not empty
    for links in link_info:
        song_title = str(links['text'])
        link = str(links['href'])
        if link.startswith('/track/') and len(song_title) > 0:
            song_links[song_title] = link

    # Find all 'td' tags
    #table_data = [td.get_text() for td in soup.find_all('td')]
    table_data = []
    for tr in table_rows:
        row_data = {}
        tds = tr.find_all('td')
        for td in tds:
            class_name = td.get('class')
            if class_name:
                # Join the list of class names to form a single string for the dictionary key
                class_name = ' '.join(class_name)
                row_data[class_name] = td.get_text()
        if row_data:
            table_data.append(row_data)

    #replace the table_link with the actual table link
    for i in range(len(table_data)): #their indexes match
        table_data[i]['table_link'] = (song_links[table_data[i]['table_name']])
    
    # Extract href attribute of each link
    #urls = [link.get('href') for link in links if link.get('href')]

    return table_data


In [6]:
# Main block to execute the function
# URL of the webpage to scrape
url = 'https://songdata.io/playlist/37i9dQZF1DX4JAvHpjipBk'

# Get the list of links
td = scrape_links(url)

In [7]:
print(td)

[{'table_play': '1', 'table_img': '', 'table_name': 'What Was That', 'table_artist': 'Lorde', 'table_data': '2025-04-24', 'table_key': 'C Major', 'table_camelot': '8B', 'table_bpm': '168', 'table_energy': '6', 'table_link': '/track/2jNyiavSywmA472t2m6ZBz/What-Was-That-by-Lorde'}, {'table_play': '2', 'table_img': '', 'table_name': 'Mystical Magical', 'table_artist': 'Benson Boone', 'table_data': '2025-04-24', 'table_key': 'B Major', 'table_camelot': '1B', 'table_bpm': '120', 'table_energy': '7', 'table_link': '/track/221eHga9f9Ne4f8D7WVgPT/Mystical-Magical-by-Benson-Boone'}, {'table_play': '3', 'table_img': '', 'table_name': 'Is This Really Love?', 'table_artist': 'd4vd', 'table_data': '2025-04-25', 'table_key': 'A♭ Major', 'table_camelot': '4B', 'table_bpm': '160', 'table_energy': '7', 'table_link': '/track/0h2vooD5PFOykbPon6ShIZ/Is-This-Really-Love-by-d4vd'}, {'table_play': '4', 'table_img': '', 'table_name': 'Money On Money (feat. Future)', 'table_artist': 'Young Thug, Future', 'tabl

In [8]:
for row in td:
    print(row['table_name'])
#so this only has the first page. The scraper took the links for the second page too.


What Was That
Mystical Magical
Is This Really Love?
Money On Money (feat. Future)
ILYSMIH
Amen
At The Beach, In Every Life
Whenever
Down to be wrong
Brunette
PIDA LO QUE QUIERA MAMI - A COLORS SHOW
On Sight
Carousel
Stateside
Nostalgia
Peacefield - Single Version
BAD BITCHES!
Gripper (feat. T-Pain)
Pack It Up
Damocles
My First Heartbreak
Learn The Hard Way
Unshatter
T.H.
Poplife
Tell Me Where U Go
AMISTAD
Gimme Dat (feat. Wizkid)
Rich Guys (Feat. Babyfxce E)
What Da Fuk
Pretty Ugly
I could
buzzkill
Alē
Let Me Be Wrong
Sungazing - Electronic Mix
WASSUP
IMMA RIDE 4 YOU
Dead Body
What I Used To Get Into
Darkness Always Wins
I Don't Know Why (I Just Do)
Hitch Hiking
Dreamin
No Enemies (feat. Namasenda)
Roar of the Lion (The Lion's Pit)
Phone Ring (with Benny The Butcher and Maxo Kream)
MONDAY
Heart By Heart
Act Right
Lovesick Lullaby
On One
Bullet
Air (feat. Julia Michaels)
Same Shit
Ya Fue
Fleeting
SINCERITY
Spring
pretty years
Footprints
G.I.R.L.
Get Started
Love to Death
Dopamine and Je

In [9]:
def scrape_descriptions(song):
    #Scrapes the descriptions from an array of dictionaries with the link to those descriptions and returns the modified dictionary with an added column 
    #containing the descriptions
    #given: table_link contains the link to scrape
    # Send a GET request to the URL
    if 'table_descr' not in song.keys():
        url = 'https://songdata.io' + str(song['table_link'])
        response = requests.get(url)
        
        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return []
        #print(url)
        # Parse the page content
        page_content = response.content
        soup = BeautifulSoup(page_content, 'html.parser')

        paragraphs = soup.find_all('p')
        #print(paragraphs)
        # Extract the text of each paragraph
        descriptions = [p.get_text() for p in paragraphs][0]
        song['table_descr'] = descriptions
    return song

In [10]:
scrape_descriptions(td[0])

{'table_play': '1',
 'table_img': '',
 'table_name': 'What Was That',
 'table_artist': 'Lorde',
 'table_data': '2025-04-24',
 'table_key': 'C Major',
 'table_camelot': '8B',
 'table_bpm': '168',
 'table_energy': '6',
 'table_link': '/track/2jNyiavSywmA472t2m6ZBz/What-Was-That-by-Lorde',
 'table_descr': '"What Was That" by Lorde had its release date on April 24, 2025. The duration of This song is about 3 minutes long, at 3:29. Based on our data, This song appears to be safe for all ages and is not considered explicit. This track is about the average length of a typical track. Since What Was That only contains only one song in the album, we like to consider this song as a single. Based on our data, New Zealand was the country where this track was produced or recorded. The popularity of What Was That is currently pretty popular right now. Although the tone can be danceable to some, this track does projects more of a negative sound rather than a postive one. '}

In [11]:
for song in td:
    scrape_descriptions(song) #do this for all songs

td

[{'table_play': '1',
  'table_img': '',
  'table_name': 'What Was That',
  'table_artist': 'Lorde',
  'table_data': '2025-04-24',
  'table_key': 'C Major',
  'table_camelot': '8B',
  'table_bpm': '168',
  'table_energy': '6',
  'table_link': '/track/2jNyiavSywmA472t2m6ZBz/What-Was-That-by-Lorde',
  'table_descr': '"What Was That" by Lorde had its release date on April 24, 2025. The duration of This song is about 3 minutes long, at 3:29. Based on our data, This song appears to be safe for all ages and is not considered explicit. This track is about the average length of a typical track. Since What Was That only contains only one song in the album, we like to consider this song as a single. Based on our data, New Zealand was the country where this track was produced or recorded. The popularity of What Was That is currently pretty popular right now. Although the tone can be danceable to some, this track does projects more of a negative sound rather than a postive one. '},
 {'table_play': 

In [12]:
td

[{'table_play': '1',
  'table_img': '',
  'table_name': 'What Was That',
  'table_artist': 'Lorde',
  'table_data': '2025-04-24',
  'table_key': 'C Major',
  'table_camelot': '8B',
  'table_bpm': '168',
  'table_energy': '6',
  'table_link': '/track/2jNyiavSywmA472t2m6ZBz/What-Was-That-by-Lorde',
  'table_descr': '"What Was That" by Lorde had its release date on April 24, 2025. The duration of This song is about 3 minutes long, at 3:29. Based on our data, This song appears to be safe for all ages and is not considered explicit. This track is about the average length of a typical track. Since What Was That only contains only one song in the album, we like to consider this song as a single. Based on our data, New Zealand was the country where this track was produced or recorded. The popularity of What Was That is currently pretty popular right now. Although the tone can be danceable to some, this track does projects more of a negative sound rather than a postive one. '},
 {'table_play': 

In [13]:
#if we only want the collection of names and links and descriptions:
top_100 = scrape_links('https://songdata.io/charts/top-100')
print(top_100)

[{'table_play': '1', 'table_img': '', 'table_name': 'luther (with sza)', 'table_artist': 'Kendrick Lamar, SZA', 'table_key': 'D Major', 'table_camelot': '10B', 'table_bpm': '138', 'table_link': '/track/45J4avUb9Ni0bnETYaYFVJ/luther-with-sza-by-Kendrick-Lamar-SZA'}, {'table_play': '2', 'table_img': '', 'table_name': 'NOKIA', 'table_artist': 'Drake', 'table_key': 'F Major', 'table_camelot': '7B', 'table_bpm': '106', 'table_link': '/track/2u9S9JJ6hTZS3Vf22HOZKg/NOKIA-by-Drake'}, {'table_play': '3', 'table_img': '', 'table_name': 'Die With A Smile', 'table_artist': 'Lady Gaga, Bruno Mars', 'table_key': 'F♯ Minor', 'table_camelot': '11A', 'table_bpm': '158', 'table_link': '/track/2plbrEY59IikOBgBGLjaoe/Die-With-A-Smile-by-Lady-Gaga-Bruno-Mars'}, {'table_play': '4', 'table_img': '', 'table_name': 'All The Way (feat. Bailey Zimmerman)', 'table_artist': 'BigXthaPlug, Bailey Zimmerman', 'table_key': 'D♭ Minor', 'table_camelot': '12A', 'table_bpm': '135', 'table_link': '/track/73kIKvg65QWieOKXIw

In [14]:
for song in top_100:
    td.append(scrape_descriptions(song)) #do this for all songs and add it to the td list.

In [15]:
len(td)

200

In [16]:
for row in td:
    row.pop('table_img',None)
    row.pop('table_play')
td

[{'table_name': 'What Was That',
  'table_artist': 'Lorde',
  'table_data': '2025-04-24',
  'table_key': 'C Major',
  'table_camelot': '8B',
  'table_bpm': '168',
  'table_energy': '6',
  'table_link': '/track/2jNyiavSywmA472t2m6ZBz/What-Was-That-by-Lorde',
  'table_descr': '"What Was That" by Lorde had its release date on April 24, 2025. The duration of This song is about 3 minutes long, at 3:29. Based on our data, This song appears to be safe for all ages and is not considered explicit. This track is about the average length of a typical track. Since What Was That only contains only one song in the album, we like to consider this song as a single. Based on our data, New Zealand was the country where this track was produced or recorded. The popularity of What Was That is currently pretty popular right now. Although the tone can be danceable to some, this track does projects more of a negative sound rather than a postive one. '},
 {'table_name': 'Mystical Magical',
  'table_artist': 'B

In [17]:
for song in td:
    scrape_descriptions(song) #do this for all songs

In [18]:
import pandas as pd

df = pd.DataFrame(td) 
df.to_csv('Recent_Songs.csv')

In [19]:
top_2024 = scrape_links('https://songdata.io/charts/2024')

In [20]:
for song in top_2024:
    scrape_descriptions(song) #do this for all songs
    song.pop('table_img',None)
    song.pop('table_play',None)

top_2024

[{'table_name': 'BIRDS OF A FEATHER',
  'table_artist': 'Billie Eilish',
  'table_key': 'G Major',
  'table_camelot': '9B',
  'table_bpm': '110',
  'table_link': '/track/6dOtVTDdiauQNBQEDOtlAB/BIRDS-OF-A-FEATHER-by-Billie-Eilish',
  'table_descr': 'Billie Eilish\'s \' "BIRDS OF A FEATHER" was released on its scheduled release date, May 17, 2024. With this song being around four minutes long, at 3:30, the duration of this song is pretty average compared to other songs. This track is safe for children and doesn\'t appear to contain any foul language, since the "Explicit" tag was not present in this track. The track order of this song in Billie Eilish\'s "HIT ME HARD AND SOFT" album is number 4 out of 10. On top of that, United States appears to be the country where this track was created. In terms of popularity, BIRDS OF A FEATHER is currently one of the hottest tracks right now. Even with the track produces more of a neutral energy, it is pretty danceable compared to others.'},
 {'table