# 1. Libraries

In [38]:
# 1. Python 
import pandas as pd
import numpy as np

# 2. Spotify API
import requests
import base64
import time
import logging

#logging.basicConfig(level=logging.INFO)

# 3. Spotify OAuth 2.0
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# 2. Access to API 

In [None]:
# 2.1 Get access to API Key&Secret
import os
from dotenv import load_dotenv

# Load .env
load_dotenv()

# Get access to SPI key&Secret
client_id = os.getenv('client_id')
client_secret = os.getenv('client_secret')

# DELETE IT WHEN COMMIT!!!!!!!!!!!!! 
print(f"Spotify Client ID: {client_id}")
print(f"Spotify Client ID: {client_secret}")

In [None]:
# 2.2 create client credentials
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# 3. Search and access to Country public playlist

In [None]:
# 3.1 functions search playlist
def search_public_playlists(query, country='US', limit=50):
    search_results = sp.search(q=query, type='playlist', limit=limit, market=country)
    playlists = search_results['playlists']['items']
    return playlists

In [None]:
# 3.2 Example: Key words
query = 'EDM'
playlists = search_public_playlists(query, country='US')

In [None]:
# 3.3 Playlist information
data = []
for playlist in playlists:
    data.append([playlist['name'], playlist['id'], playlist['owner']['display_name'], playlist['tracks']['total']])

In [None]:
# 3.4 DataFrame
df = pd.DataFrame(data, columns=['Name', 'ID', 'Owner', 'Total Tracks'])

df.head()

# 4. Search Tracks in playlists

In [None]:
# 4.1 Get access to all playlist_tracks

def get_playlist_tracks(playlist_id):
    tracks = []
    try:
        results = sp.playlist_tracks(playlist_id)
        tracks.extend(results['items'])
        
        while results['next']:
            time.sleep(1)  # rate limit 
            results = sp.next(results)
            tracks.extend(results['items'])
    except spotipy.exceptions.SpotifyException as e:
        logging.error(f"Error fetching tracks for playlist {playlist_id}: {e}")
    
    return tracks

In [9]:
# 4.2 Get track features 

def get_audio_features_with_info(track_ids):
    features = []
    track_infos = []
    try:
        for i in range(0, len(track_ids), 100):  # Max 100 everytime 
            time.sleep(1)  
            features.extend(sp.audio_features(track_ids[i:i+100]))
        
        for i in range(0, len(track_ids), 50):  # Max 50 everytime 
            time.sleep(1)  
            track_infos.extend(sp.tracks(track_ids[i:i+50])['tracks'])
    except spotipy.exceptions.SpotifyException as e:
        logging.error(f"Error fetching audio features or track info: {e}")
    
    combined_info = []
    for feature, track in zip(features, track_infos):
        if feature and track:  
            combined_info.append({
                'track_id': track['id'],
                'track_name': track['name'],
                'artist_name': ', '.join([artist['name'] for artist in track['artists']]),
                'album_name': track['album']['name'],
                'release_date': track['album']['release_date'],
                'duration_ms': track['duration_ms'],
                'popularity': track['popularity'],
                'danceability': feature['danceability'],
                'energy': feature['energy'],
                'key': feature['key'],
                'loudness': feature['loudness'],
                'mode': feature['mode'],
                'speechiness': feature['speechiness'],
                'acousticness': feature['acousticness'],
                'instrumentalness': feature['instrumentalness'],
                'liveness': feature['liveness'],
                'valence': feature['valence'],
                'tempo': feature['tempo']
            })
    return combined_info

In [10]:
# 4.3 Get all features in playlists 

def get_playlist_audio_features(playlist_id):
    tracks = get_playlist_tracks(playlist_id)
    track_ids = [track['track']['id'] for track in tracks if track['track']['id'] is not None]
    audio_features_with_info = get_audio_features_with_info(track_ids)
    return audio_features_with_info

In [11]:
# 4.5 EDM in US 

query = 'EDM'
playlists = search_public_playlists(query, country='US')

all_audio_features = []
unique_track_ids = set()

for playlist in playlists:
    playlist_id = playlist['id']
    print(f"Getting audio features for playlist: {playlist['name']}")
    audio_features = get_playlist_audio_features(playlist_id)
    
    for feature in audio_features: #drop duplicated track_id 
        if feature['track_id'] not in unique_track_ids:
            unique_track_ids.add(feature['track_id'])
            all_audio_features.append(feature)

Getting audio features for playlist: EDM Music 2024 (Top 100)
Getting audio features for playlist: mint
Getting audio features for playlist: EDM House Mix
Getting audio features for playlist: EDM BANGERS ❤️‍🔥🤯🌎
Getting audio features for playlist: Chill Tracks
Getting audio features for playlist: EDM Hard Bass Mix
Getting audio features for playlist: EDM Classics (Top 100)
Getting audio features for playlist: Workout EDM Mix
Getting audio features for playlist: Summer EDM Mix
Getting audio features for playlist: Happy Beats
Getting audio features for playlist: hot girl EDM workout mix 🎧
Getting audio features for playlist: Pop Hits 2000s – 2024
Getting audio features for playlist: Club EDM Mix
Getting audio features for playlist: EDM BANGERS 🔥🎧
Getting audio features for playlist: EDM 2024
Getting audio features for playlist: EDM MIX 2024 🔥 EDM HITS 🔥TOMORROWLAND 2024
Getting audio features for playlist: Ultra Gaming
Getting audio features for playlist: Best EDM Songs of All Time - Mos

In [63]:
# Create DataFrame and Save 
df = pd.DataFrame(all_audio_features)
csv_filename = '../data/raw//US_TOP_50_EDM_playlists_tracks.csv'
df.to_csv(csv_filename, index=False)
print(f"CSV file saved: {csv_filename}")

#display(df.head())

CSV file saved: ../data/raw//US_TOP_50_EDM_playlists_tracks.csv


In [None]:
# 4.7 EDM in UK
query = 'EDM'
playlists = search_public_playlists(query, country='DE')

all_audio_features = []
unique_track_ids = set()

for playlist in playlists:
    playlist_id = playlist['id']
    print(f"Getting audio features for playlist: {playlist['name']}")
    audio_features = get_playlist_audio_features(playlist_id)
    
    for feature in audio_features: #drop duplicated track_id 
        if feature['track_id'] not in unique_track_ids:
            unique_track_ids.add(feature['track_id'])
            all_audio_features.append(feature)


In [None]:
# EDM in NL
# EDM in ES
# EDM in PT
