In this notebook, I connect to the Spotify API (via Spotipy) to request data for the band "All Them Witches". I will create a csv that I can make an analysis with later on.

In [1]:
# Basics: import librabries and connect to Spotify API

import config
import pandas as pd
import json
import numpy as np
import time
import spotipy 
from spotipy.oauth2 import SpotifyClientCredentials 
import warnings
cid = config.cid
secret = config.secret
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret) 
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [2]:
# Get artist ID

search_string = "AllThemWitches"
result = sp.search(q=search_string, type="artist")
artist_id = result['artists']['items'][0]['id']
artist_id

'29Wmfm1CojrjQ3aQP0FI65'

In [3]:
# get audio features of their songs

def get_audio_data(artist_id, sp):
    albums = sp.artist_albums(artist_id)
    album_ids = [album['id'] for album in albums['items']]
    
    audio_features = []
    
    for album_id in album_ids:
        tracks = sp.album_tracks(album_id)
        track_ids = [track['id'] for track in tracks['items']]
        audio_features += sp.audio_features(track_ids)
    
    audio_features_df = pd.DataFrame(audio_features)
    return audio_features_df

atw_audio_df = get_audio_data(artist_id, sp)

In [4]:
warnings.simplefilter(action='ignore', category=FutureWarning)

# get metadata about their songs

def get_tracks(track_ids, sp, limit=50, time_interval=2):
    num_chunks = int(np.ceil(len(track_ids) / limit))
    track_id_chunks = np.array_split(track_ids, num_chunks)
    
    combined_results = []
    
    for chunk in track_id_chunks:
        track_info = sp.tracks(chunk)
        combined_results += track_info['tracks']
        time.sleep(time_interval)
    
    track_popularity_df = pd.DataFrame(combined_results)
    track_popularity_df['album_release_date'] = track_popularity_df['album'].apply(lambda x: x['release_date'])
    track_popularity_df['album_name'] = track_popularity_df['album'].apply(lambda x: x['name'])
    return track_popularity_df

track_ids = atw_audio_df['id'].transpose()

combined_results_df = get_tracks(track_ids, sp)
popularity_score_df = combined_results_df[['id', 'name', 'popularity', 'explicit', 'uri', 'album_name', 'album_release_date']]

In [5]:
# merge the data and make some adjustments

songs = pd.merge(atw_audio_df, popularity_score_df, on='id', how='inner')

In [6]:
songs.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,track_href,analysis_url,duration_ms,time_signature,name,popularity,explicit,uri_y,album_name,album_release_date
0,0.277,0.675,2,-8.831,1,0.0446,0.00178,0.603,0.151,0.0392,...,https://api.spotify.com/v1/tracks/29JJH9fvsUPR...,https://api.spotify.com/v1/audio-analysis/29JJ...,583278,4,Blood And Sand / Milk And Endless Waters - Live,24,False,spotify:track:29JJH9fvsUPRObLXsK6G1H,Live On The Internet,2022-03-18
1,0.285,0.567,5,-8.211,1,0.0307,0.00173,0.126,0.175,0.132,...,https://api.spotify.com/v1/tracks/0cMsXAtOu1x9...,https://api.spotify.com/v1/audio-analysis/0cMs...,244609,3,Dirt Preachers - Live,22,False,spotify:track:0cMsXAtOu1x9Ckoac6FIF8,Live On The Internet,2022-03-18
2,0.441,0.795,7,-8.601,0,0.0407,0.00464,0.601,0.124,0.172,...,https://api.spotify.com/v1/tracks/3LuWevrwAj9e...,https://api.spotify.com/v1/audio-analysis/3LuW...,300513,4,Saturnine & Iron Jaw - Live,23,False,spotify:track:3LuWevrwAj9ei0Y6DgwP5a,Live On The Internet,2022-03-18
3,0.418,0.59,0,-8.548,0,0.0316,0.0126,0.663,0.109,0.286,...,https://api.spotify.com/v1/tracks/7cSAoErMPkB1...,https://api.spotify.com/v1/audio-analysis/7cSA...,332148,4,41 - Live,22,False,spotify:track:7cSAoErMPkB1gpfs8JBH19,Live On The Internet,2022-03-18
4,0.254,0.702,5,-7.531,1,0.0417,0.000408,0.107,0.0838,0.396,...,https://api.spotify.com/v1/tracks/1jZ4ddGpd67s...,https://api.spotify.com/v1/audio-analysis/1jZ4...,221170,4,When God Comes Back - Live,21,False,spotify:track:1jZ4ddGpd67sZe9tktngZ5,Live On The Internet,2022-03-18


In [7]:
songs.to_csv('data/AllThemWitches.csv', index=False)

Outlook: At some point I'd like to add more interesting information about the tracks, I've found more things I'd be interested in [here](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis).

```
# This function retrieves detailed audio analysis for a track, including timing information like when fade-ins and fade-outs occur. 

def get_analysis_data(artist_id, sp):
    albums = sp.artist_albums(artist_id)
    album_ids = [album['id'] for album in albums['items']]
    
    analysis_features = []
    
    for album_id in album_ids:
        tracks = sp.album_tracks(album_id)
        track_ids = [track['id'] for track in tracks['items']]
        for track_id in track_ids:
            analysis_features.append(sp.audio_analysis(track_id))
    
    analysis_features_df = pd.DataFrame(analysis_features)
    return analysis_features_df

atw_analysis_df = get_analysis_data(artist_id, sp)

```