In [12]:
# importing libraries
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

import requests
import time
import csv

In [6]:
# scraping Shazam's Top 200 weekly songs in the U.S.
# function for song title, artist, rank and partial URL
def get_song_urls(chart_url):
    hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
    page = requests.get(chart_url, headers=hdr)
    soup = BeautifulSoup(page.text, 'html.parser')
    
    song_list = []
    
    # find all song containers
    songs = soup.find_all('a', class_='ContainerLink-module_containerLinkElement__EOJ6S')
    
    rank = 1
    for song in songs:
        # extract song title from aria-label attribute
        song_title = song.get('aria-label')
        
        # extract partial URL
        partial_url = song.get('href')
        
        # for artist name, we need to find it in nearby elements
        artist_container = song.find_next('a', attrs={'data-test-id': 'charts_userevent_list_artistName'})
        song_artist = artist_container.text if artist_container else "Unknown Artist"
        
        # add to song list
        song_list.append(f"{rank}. {song_title} by {song_artist}. Partial url: {partial_url}")
        
        rank += 1
    
    # open and write to file
    file = open('songs.txt', 'w', encoding='utf-8')
    for song_info in song_list:
        file.write(song_info + '\n')
    file.close()
    
    return song_list

# call the function 
get_song_urls('https://www.shazam.com/charts/top-200/united-states')

['1. ANXIETY (feat. Doechii) by Sleepy Hallow. Partial url: /song/1706412053/anxiety-feat-doechii',
 '2. luther by Kendrick Lamar & SZA. Partial url: /song/1781270323/luther',
 '3. NOKIA by Drake. Partial url: /song/1796127375/nokia',
 '4. Messy by Lola Young. Partial url: /song/1743250261/messy',
 '5. Anxiety by Doechii. Partial url: /song/1800052074/anxiety',
 '6. APT. by ROSÉ & Bruno Mars. Partial url: /song/1771105935/apt',
 '7. Die With A Smile by Lady Gaga & Bruno Mars. Partial url: /song/1792667027/die-with-a-smile',
 '8. Ordinary by Alex Warren. Partial url: /song/1793663645/ordinary',
 '9. DENIAL IS A RIVER by Doechii. Partial url: /song/1763934447/denial-is-a-river',
 '10. 30 For 30 (with Kendrick Lamar) by SZA. Partial url: /song/1786643047/30-for-30-with-kendrick-lamar',
 '11. ACTIN UP by Tommy Richman. Partial url: /song/1792976782/actin-up',
 '12. Timeless by The Weeknd & Playboi Carti. Partial url: /song/1793654640/timeless',
 '13. SOMEBODY LOVES ME by PARTYNEXTDOOR & Dr

In [1]:
# begin scraping featured in section for each song
    
def get_featured_in(song_url):
    hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
    
    
    # setup browser driver to run headless
    browser_options = Options()
    browser_options.add_argument("--headless")
    driver = webdriver.Chrome(options=browser_options)

    
    soup = None
   
    try:
        # get song page
        driver.get(song_url)
        
        time.sleep(3) 
        
        # get the html 
        page = driver.page_source
        
        # close the driver
        driver.quit()

        soup = BeautifulSoup(page, 'html.parser')
        # soup = BeautifulSoup(page.text, 'html.parser')
        
        # focuses the search area to the "Featured In" section
        soup = soup.find('div', class_='PageGrid-module_content__Ek7uQ SongPageContent_grayBg__Gv7kq')
    
             
        # find all items in the "Featured In" section
        featured_items = []
       
        # locate the featured section
        for item in soup.find_all('div', class_='MedialistInner-module_child__P5BSj'):
            # extract type (ALBUM/PLAYLIST)
            item_type = item.find('span', class_='AlbumItem-module_isPlaylistLabel__sljzV')
            item_type = item_type.text.strip() if item_type else "Unknown Type"
          
            # extract the title of the album or playlist
            item_title = item.find('span', class_='Text-module_text-black__mkuUo')
            item_title = item_title.text.strip() if item_title else "Unknown Title"
           
            # extract the owner/creator of the album or playlist
            item_owner = item.find('span', class_='Text-module_text-gray-900__Qcj0F Text-module_fontFamily__cQFwR AlbumItem-module_creatorLabel__5YY3L Text-post-module_size-base__o144k Text-module_fontWeightNormal__kB6Wg Text-module_textOverflowEllipsis__J7BCo')
            item_owner = item_owner.text.strip() if item_owner else "Unknown Owner"
           
            # append the extracted data to the list
            featured_items.append({
                'type': item_type,
                'title': item_title,
                'owner': item_owner
            })
       
        return featured_items
   
    except Exception as e:
        print(f"Error scraping {song_url}: {e}")
        return []

def scrape_shazam_top_200():
    # setup
    url = 'https://www.shazam.com/charts/top-200/united-states'
    hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
   
    # get main page
    page = requests.get(url, headers=hdr)
    soup = BeautifulSoup(page.text, 'html.parser')
   
    # open CSV file
    file = open('songs.csv', 'w', newline='', encoding='utf-8')
    writer = csv.writer(file)
    writer.writerow(['Rank', 'Title', 'Artist', 'URL', 'Featured Type', 'Featured Title', 'Featured Owner'])
   

    songs = soup.find_all('a', class_='ContainerLink-module_containerLinkElement__EOJ6S')
    rank = 1
   
    for song in songs[:200]:
        title = song.get('aria-label')
        partial_url = song.get('href')
        full_url = f"https://www.shazam.com{partial_url}"
        artist_container = song.find_next('a', attrs={'data-test-id': 'charts_userevent_list_artistName'})
        artist = artist_container.text if artist_container else "Unknown Artist"
       
        print(f"Scraping {rank}: {title}")
       
        # get featured in data
        featured_items = get_featured_in(full_url)
       
        if featured_items:
            for item in featured_items:
                writer.writerow([rank, title, artist, full_url, item['type'], item['title'], item['owner']])
        else:
            writer.writerow([rank, title, artist, full_url, "", "", ""])
       
        rank += 1
        time.sleep(1)  # give it a sec to run
   
    file.close()
    print("Finished scraping. Check songs.csv for results.")

# run
scrape_shazam_top_200()

Processing 1: ANXIETY (feat. Doechii)
Processing 2: luther
Processing 3: NOKIA
Processing 4: Messy
Processing 5: Anxiety
Processing 6: APT.
Processing 7: Die With A Smile
Processing 8: Ordinary
Processing 9: DENIAL IS A RIVER
Processing 10: 30 For 30 (with Kendrick Lamar)
Processing 11: ACTIN UP
Processing 12: Timeless
Processing 13: SOMEBODY LOVES ME
Processing 14: That’s So True
Processing 15: Pink Pony Club
Processing 16: Beautiful Things
Processing 17: Proud Of Me
Processing 18: tv off (feat. Lefty Gunplay)
Processing 19: Lose Control
Processing 20: Scared (Sped Up)
Processing 21: Somebody That I Used to Know (feat. Kimbra)
Processing 22: GIMME A HUG
Processing 23: ME JALO
Processing 24: when the party's over
Processing 25: Not Like Us
Processing 26: Bad Dreams
Processing 27: Headlock
Processing 28: Where Do We Go From Here (feat. Menahan Street Band)
Processing 29: Love Somebody
Processing 30: Spitting Off the Edge of the World (feat. Perfume Genius)
Processing 31: Sailor Song
Pro

In [10]:
#putting songs.txt and songs.csv files into one .txt file
def create_sectioned_text_file():
    # read songs.txt
    songs_data = []
    txt_file = open('songs.txt', 'r', encoding='utf-8')
    songs_section = txt_file.read()
    txt_file.close()
    
    # read songs.csv
    features_data = []
    csv_file = open('songs.csv', 'r', encoding='utf-8')
    reader = csv.reader(csv_file)
    
    # convert CSV to text format
    features_section = ""
    for i, row in enumerate(reader):
        features_section += "\t".join(row) + "\n"
    csv_file.close()
    
    # xreate combined text file with sections
    output_file = open('shazam_data.txt', 'w', encoding='utf-8')
    
    # write songs section
    output_file.write("=" * 80 + "\n")
    output_file.write("Section: Shazam's Weekly Top 200 U.S. Songs as of 3/10/2025 \n")
    output_file.write("=" * 80 + "\n\n")
    output_file.write(songs_section)
    output_file.write("\n\n")
    
    # write featured in section
    output_file.write("=" * 80 + "\n")
    output_file.write("Section: Every album or playlist the song is featured in \n")
    output_file.write("=" * 80 + "\n\n")
    output_file.write(features_section)
    
    output_file.close()
    print("Created shazam_data.txt with multiple sections")

# run the function
create_sectioned_text_file()

Created shazam_data.txt with multiple sections
