In [120]:
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin
import csv
import logging

In [121]:
def scrape_song(url):
    song_urls = []
    
    logging.basicConfig(filename='logs/url_scrape.txt', level=logging.ERROR, format='%(asctime)s - %(message)s')
    
    response = requests.get(url)
    if response.status_code == 200:
        soup_genres = BeautifulSoup(response.text, 'html.parser')
        canvas_genres = soup_genres.find('div', class_='canvas')
        if canvas_genres:
            item_genres = canvas_genres.find_all('div', {'preview_url': True})
            for item_genre in item_genres:
                navlink = item_genre.find('a', {'class': 'navlink'})
                genre = item_genre.text.strip().replace('»', '')
                if navlink:
                    href_genre = navlink.get('href')
                    genre_url = urljoin(url, href_genre)

                    try:
                        response_genre = requests.get(genre_url)
                        response_genre.raise_for_status()
                    except requests.RequestException as e:
                        logging.error(f"Failed to retrieve genre URL: {genre_url}. Error {e}")
                        continue

                    soup_songs = BeautifulSoup(response_genre.text, 'html.parser')
                    canvas_songs = soup_songs.find('div', class_='canvas')
                    if canvas_songs:
                        item_songs = canvas_songs.find_all('div', {'preview_url': True})
                        
                        with open('songs.csv', mode='a', newline='', encoding='utf-8') as file:
                            writer = csv.DictWriter(file, fieldnames=['song_id', 'song_url', 'genre', 'artist', 'title'])
                            
                            if file.tell() == 0:
                                writer.writeheader()
 
                            for idx, item_song in enumerate(item_songs, start=1):
                                song_url = item_song.get('preview_url')
                                song_urls.append(song_url)
                                artist = item_song.text.strip().replace('»', '')
                                title_long = item_song['title']
                                song_id = f"{genre.lower().replace(' ', '_')}_{idx}"

                                song_info = {
                                    'song_id': song_id,
                                    'song_url' : song_url,
                                    'genre': genre,
                                    'artist': artist,
                                    'title': title_long
                                }
                                try:
                                    writer.writerow(song_info)
                                except Exception as e:
                                    logging.error(f"Failed to write song info to CSV. Error: {e}")

    return song_urls

In [115]:
# scrape_song('https://everynoise.com/')

['https://p.scdn.co/mp3-preview/dba15da5409f3c808022cf927c0ff8581f717aa4',
 'https://p.scdn.co/mp3-preview/57c1238d183c40da3157c2892346f58445b1377c',
 'https://p.scdn.co/mp3-preview/7c7b0a5f4f09f82a65fbbfbc17f6ad414d77e837',
 'https://p.scdn.co/mp3-preview/8b46a1ab83d60d36098bbe142b07536dc3788f2b',
 'https://p.scdn.co/mp3-preview/4bd2dc84016f3743add7eea8b988407b1b900672',
 'https://p.scdn.co/mp3-preview/f544992f165d4bd069f4d903ff946dc625ba0b8e',
 'https://p.scdn.co/mp3-preview/667e62a782667b01e98006cd3ba42c9852b62bea',
 'https://p.scdn.co/mp3-preview/6a035bc6ac2041a2edcdde555df93cd2af89abef',
 'https://p.scdn.co/mp3-preview/3c0788c6aba94192edcb497d9a02075bf76c5400',
 'https://p.scdn.co/mp3-preview/4e30857a3c7da3f8891483643e310bb233afadd2',
 'https://p.scdn.co/mp3-preview/93a6049d8bb02d00881a05bb06499c4042a25969',
 'https://p.scdn.co/mp3-preview/10746d0627d5dd428001083030cf6726c5e92c67',
 'https://p.scdn.co/mp3-preview/c0d9119dc69cae75baf6463e21e43f433fdf5ff4',
 'https://p.scdn.co/mp3-p