In [33]:
import pandas as pd
import requests
import spotipy
from dotenv import load_dotenv
import os
from spotipy.oauth2 import SpotifyOAuth

In [2]:
load_dotenv()

True

In [3]:
CLIENT_ID = os.getenv('client_id')
CLIENT_SECRET = os.getenv('client_secret')


In [4]:
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    redirect_uri='http://localhost:8888/callback',
    scope="user-library-read"
))

In [5]:
sp

<spotipy.client.Spotify at 0x1fffe15a410>

In [6]:
def get_artists_from_playlist(playlist_id):
    artist_ids = set()
    results = sp.playlist_tracks(playlist_id)
    
    while results:
        for item in results['items']:
            artist = item['track']['artists'][0]
            artist_ids.add(artist['id'])
        
        # Check if there's another page
        results = sp.next(results) if results['next'] else None

    return list(artist_ids)

# Example: Top 50 Global Playlist
top_50_global_playlist_id = '37i9dQZEVXbMDoHDwVN2tF'
artist_ids = get_artists_from_playlist(top_50_global_playlist_id)

In [8]:
def get_related_artists(artist_ids):
    related_artist_ids = set()
    
    for artist_id in artist_ids:
        results = sp.artist_related_artists(artist_id)
        for artist in results['artists']:
            related_artist_ids.add(artist['id'])

    return list(related_artist_ids)

# Expand the list by fetching related artists
all_artist_ids = set(artist_ids)
all_artist_ids.update(get_related_artists(artist_ids))


In [9]:
len(all_artist_ids)

559

In [12]:
genre_seeds = sp.recommendation_genre_seeds()

# Print the list of genres
print("Popular Genres on Spotify:")
for genre in genre_seeds['genres']:
    print(genre)

print(len(genre_seeds['genres']))

Popular Genres on Spotify:
acoustic
afrobeat
alt-rock
alternative
ambient
anime
black-metal
bluegrass
blues
bossanova
brazil
breakbeat
british
cantopop
chicago-house
children
chill
classical
club
comedy
country
dance
dancehall
death-metal
deep-house
detroit-techno
disco
disney
drum-and-bass
dub
dubstep
edm
electro
electronic
emo
folk
forro
french
funk
garage
german
gospel
goth
grindcore
groove
grunge
guitar
happy
hard-rock
hardcore
hardstyle
heavy-metal
hip-hop
holidays
honky-tonk
house
idm
indian
indie
indie-pop
industrial
iranian
j-dance
j-idol
j-pop
j-rock
jazz
k-pop
kids
latin
latino
malay
mandopop
metal
metal-misc
metalcore
minimal-techno
movies
mpb
new-age
new-release
opera
pagode
party
philippines-opm
piano
pop
pop-film
post-dubstep
power-pop
progressive-house
psych-rock
punk
punk-rock
r-n-b
rainy-day
reggae
reggaeton
road-trip
rock
rock-n-roll
rockabilly
romance
sad
salsa
samba
sertanejo
show-tunes
singer-songwriter
ska
sleep
songwriter
soul
soundtracks
spanish
study
summer
swe

In [28]:
import json

def search_artists_by_genre(genre, limit=100):
    artist_ids = set()
    results = sp.search(q=f'genre:{genre}', type='artist', limit=50)
    
    while results and len(artist_ids) < limit:
        for artist in results['artists']['items']:
            artist_ids.add(artist['id'])
        
        # Check if there's another page and update search results
        if results['artists']['next']:
            results = sp.next(results['artists'])
        else:
            results = None

    return list(artist_ids)

for genre in genre_seeds['genres']:
    all_artist_ids.update(search_artists_by_genre(genre, limit=10))


In [29]:
len(all_artist_ids)

4635

In [30]:
all_artist_ids.update(get_related_artists(artist_ids))

In [31]:
len(all_artist_ids)

4635

In [34]:
track_df = pd.DataFrame()


In [52]:
import time
from requests.exceptions import ReadTimeout


def fetch_with_retry(func, *args, **kwargs):
    retries = 3
    for attempt in range(retries):
        try:
            return func(*args, **kwargs)
        except ReadTimeout:
            print(f"Timeout error, retrying... ({attempt + 1}/{retries})")
            time.sleep(2 ** attempt)  # Exponential backoff

def get_artist_tracks(artist_id):
    # Initialize a list to store track data
    tracks_data = []
    
    # Search for the artist's top tracks
    results = sp.artist_top_tracks(artist_id)
    
    for track in results['tracks']:
        track_id = track['id']
        track_name = track['name']
        artist_name = track['artists'][0]['name']
        artist_id = track['artists'][0]['id']
        
        # Fetch track features
        features = fetch_with_retry(sp.audio_features, track_id)[0]
        
        # Append track data
        tracks_data.append({
            'artist_name': artist_name,
            'artist_id': artist_id,
            'track_id': track_id,
            'track_name': track_name,
            'danceability': features['danceability'] if features else None,
            'energy': features['energy'] if features else None,
            'key': features['key'] if features else None,
            'loudness': features['loudness'] if features else None,
            'mode': features['mode'] if features else None,
            'speechiness': features['speechiness'] if features else None,
            'acousticness': features['acousticness'] if features else None,
            'instrumentalness': features['instrumentalness'] if features else None,
            'liveness': features['liveness'] if features else None,
            'valence': features['valence'] if features else None,
            'tempo': features['tempo'] if features else None
        })
    
    return tracks_data

# Example list of artist IDs (replace with your actual artist IDs

# Create DataFrame
tracks_df = pd.DataFrame()
for artist_id in all_artist_ids:
        tracks_data = get_artist_tracks(artist_id)
        tracks_df.append(tracks_data)





# Display the DataFrame
print(tracks_df.head())

Max Retries reached


SpotifyException: http status: 429, code:-1 - /v1/audio-features/?ids=4qArKIA7dnmdushoBlkV94:
 Max Retries, reason: too many 429 error responses

In [51]:
tracks_df

NameError: name 'tracks_df' is not defined