In this notebook, I connect to the Spotify API (via Spotipy) to request data for the band "Rival Sons". I will create a csv that I can make an analysis with later on.

In [1]:
# Basics: import librabries and connect to Spotify API

import config
import pandas as pd
import json
import numpy as np
import time
import spotipy 
from spotipy.oauth2 import SpotifyClientCredentials 
import warnings
cid = config.cid
secret = config.secret
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret) 
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [2]:
# Get artist ID

search_string = "RivalSons"
result = sp.search(q=search_string, type="artist")
artist_id = result['artists']['items'][0]['id']
artist_id

'356c8AN5YWKvz86B4Sb1yf'

In [3]:
# get audio features of their songs

def get_audio_data(artist_id, sp):
    albums = sp.artist_albums(artist_id)
    album_ids = [album['id'] for album in albums['items']]
    
    audio_features = []
    
    for album_id in album_ids:
        tracks = sp.album_tracks(album_id)
        track_ids = [track['id'] for track in tracks['items']]
        audio_features += sp.audio_features(track_ids)
    
    audio_features_df = pd.DataFrame(audio_features)
    return audio_features_df

rs_audio_df = get_audio_data(artist_id, sp)

In [4]:
warnings.simplefilter(action='ignore', category=FutureWarning)

# get metadata about their songs

def get_tracks(track_ids, sp, limit=50, time_interval=2):
    num_chunks = int(np.ceil(len(track_ids) / limit))
    track_id_chunks = np.array_split(track_ids, num_chunks)
    
    combined_results = []
    
    for chunk in track_id_chunks:
        track_info = sp.tracks(chunk)
        combined_results += track_info['tracks']
        time.sleep(time_interval)
    
    track_popularity_df = pd.DataFrame(combined_results)
    track_popularity_df['album_release_date'] = track_popularity_df['album'].apply(lambda x: x['release_date'])
    track_popularity_df['album_name'] = track_popularity_df['album'].apply(lambda x: x['name'])
    return track_popularity_df

track_ids = rs_audio_df['id'].transpose()

combined_results_df = get_tracks(track_ids, sp)
popularity_score_df = combined_results_df[['id', 'name', 'popularity', 'explicit', 'uri', 'album_name', 'album_release_date']]

In [5]:
# merge the data and make some adjustments

songs = pd.merge(rs_audio_df, popularity_score_df, on='id', how='inner')

In [6]:
songs.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,track_href,analysis_url,duration_ms,time_signature,name,popularity,explicit,uri_y,album_name,album_release_date
0,0.27,0.676,0,-7.125,0,0.0478,0.0135,0.0166,0.111,0.17,...,https://api.spotify.com/v1/tracks/4wlv0fS22uw0...,https://api.spotify.com/v1/audio-analysis/4wlv...,538112,4,DARKFIGHTER,39,False,spotify:track:4wlv0fS22uw0KeRAjWCO4H,LIGHTBRINGER,2023-10-20
1,0.386,0.911,4,-3.984,1,0.14,0.0152,0.00088,0.306,0.284,...,https://api.spotify.com/v1/tracks/4wEKLWQxK9xz...,https://api.spotify.com/v1/audio-analysis/4wEK...,266621,4,Mercy,40,False,spotify:track:4wEKLWQxK9xzwYaWQkgzBj,LIGHTBRINGER,2023-10-20
2,0.454,0.596,8,-5.626,0,0.0298,0.0765,0.0,0.092,0.397,...,https://api.spotify.com/v1/tracks/4xgWLAco4NWZ...,https://api.spotify.com/v1/audio-analysis/4xgW...,294920,4,Redemption,40,False,spotify:track:4xgWLAco4NWZfBG4yo0nDL,LIGHTBRINGER,2023-10-20
3,0.553,0.913,8,-4.009,1,0.0979,0.00109,7e-06,0.0259,0.397,...,https://api.spotify.com/v1/tracks/17VMnoDSLyOe...,https://api.spotify.com/v1/audio-analysis/17VM...,213067,4,Sweet Life,44,False,spotify:track:17VMnoDSLyOe5yierXC2y6,LIGHTBRINGER,2023-10-20
4,0.417,0.886,2,-4.654,0,0.0328,0.00104,2.2e-05,0.0498,0.227,...,https://api.spotify.com/v1/tracks/58fexapc8M6Z...,https://api.spotify.com/v1/audio-analysis/58fe...,364520,4,Before The Fire,38,False,spotify:track:58fexapc8M6ZEJdxjbUpMo,LIGHTBRINGER,2023-10-20


In [7]:
songs.to_csv('data/RivalSons.csv', index=False)

Outlook: At some point I'd like to add more interesting information about the tracks, I've found more things I'd be interested in [here](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis).

```
# This function retrieves detailed audio analysis for a track, including timing information like when fade-ins and fade-outs occur. 

def get_analysis_data(artist_id, sp):
    albums = sp.artist_albums(artist_id)
    album_ids = [album['id'] for album in albums['items']]
    
    analysis_features = []
    
    for album_id in album_ids:
        tracks = sp.album_tracks(album_id)
        track_ids = [track['id'] for track in tracks['items']]
        for track_id in track_ids:
            analysis_features.append(sp.audio_analysis(track_id))
    
    analysis_features_df = pd.DataFrame(analysis_features)
    return analysis_features_df

rs_analysis_df = get_analysis_data(artist_id, sp)

```