In [13]:
# imports
from dotenv import load_dotenv
import os
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from IPython.display import display
import numpy as np

In [14]:
# load env variables
load_dotenv()

# get the client id and secret from the env
client_id = os.getenv('SPOTIPY_CLIENT_ID')
client_secret = os.getenv('SPOTIPY_CLIENT_SECRET')

In [15]:
# set up spotipy with my client credentials. use the sp object to call methods / interact with the api
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

In [16]:
# Set pandas to display longer content without truncation
# pd.set_option('display.max_colwidth', None)  # No limit on column width
# pd.set_option('display.max_rows', None)      # No limit on rows displayed

In [17]:
# format time duration
def format_duration(duration_ms):
    # HH:MM:SS format
    seconds = (duration_ms // 1000) % 60
    minutes = (duration_ms // (1000 * 60)) % 60
    hours = (duration_ms // (1000 * 60 * 60))
    return f"{hours}:{minutes:02}:{seconds:02}"

In [18]:
def get_tracks_for_album(album_id):
    album_tracks = sp.album_tracks(album_id)
    return album_tracks['items']

In [19]:
def get_song_features(track_id):
    song_features = sp.audio_features(track_id)
    return song_features[0] if song_features else None

In [20]:
# this is a list of dicts, and which track it comes from is unspecified
def get_all_audio_features(track_ids):
    features_list = []
    for track_id in track_ids:
        features = get_song_features(track_id)
        if features:
            features_list.append(features)
    return features_list

In [51]:
# function to calculate mean/median/mode of any audio feature
def calculate_average_features(audio_features, feature_list):
    if not audio_features:
        return None
    
    # df from the audio features in the list of dicts provided
    features_df = pd.DataFrame(audio_features)

    display(features_df)

    # the mean for the specified features, while appending "_avg" to the col name using rename and a lamba func
    mean_features = features_df[feature_list].mean().rename(lambda x: f"{x}_avg").to_dict()

    # if i didnt want to change the col name, i'd just do this:
    # mean_features = features_df[feature_list].mean().to_dict()

    # dict of avg values for the specified features
    # print(mean_features)
    return mean_features


In [38]:
# function that takes in album and artist, and outputs the album info
def get_album_info(album_title, artist_name):
    query = f'album:{album_title} artist:{artist_name}' # spotify api syntax for this search query
    result = sp.search(q=query, type='album', limit=1) # returns the album, 1
    
    # print(result)
    if result['albums']['items']:
        # parse JSON to get the first (and only) album for its info
        album = result['albums']['items'][0]
        album_id = album['id']

        # detailed info like genre and popularity
        album_details = sp.album(album['id'])
        print(album_details['popularity'])
        # print(album_details['genres'])

        # Get album tracks
        tracks = get_tracks_for_album(album_id)

        # track duration sum for album duration
        track_durations = [track['duration_ms'] for track in tracks]
        total_duration_ms = sum(track_durations)
        print(format_duration(total_duration_ms))

        # extract details from JSON and put into new dict of info for the album
        album_data = {
            'album_title': album['name'],
            'album_id': album['id'],
            'artist_name': album['artists'][0]['name'], #this is just 1 artist for now, but later can make a list if theres multiple primary artists
            'release_date': album['release_date'],
            'total_tracks': album['total_tracks'],
            'duration_ms': total_duration_ms,
            'duration': format_duration(total_duration_ms),
            'genres': album.get('genres', []), #avoid error if no genres specified
            'popularity': album_details['popularity'],
            'uri': album['uri']
        }
        return album_data
    else:
        return None

In [39]:
def read_csv(file_path):
    return pd.read_csv(file_path)

In [52]:
# Main function
def main():
    # csv file path
    csv_file_path = 'albums.csv'

    # read csv into pandas dataframe
    df = read_csv(csv_file_path)

    # process the df 
    album_data_list = []
    # go through each row of the df (each album) and get the data 
    for index, row in df.iterrows():
        album_title = row['album_title']
        artist_name = row['artist_name']

        album_data = get_album_info(album_title, artist_name)

        # individual track data for the album
        if album_data:
            track_ids = [track['id'] for track in get_tracks_for_album(album_data['album_id'])]
            audio_features = get_all_audio_features(track_ids)
            album_data['audio_features'] = audio_features  # Store audio features in the album data
            # print(album_data['audio_features'])

            # calculate avg audio features (e.g. danceability, energy)
            features_to_average = ['tempo', 'valence', 'danceability', 'energy', 'acousticness', 'liveness', 'loudness','speechiness'] # later, keep track of mode for key and time signature? or need specifics # of a certain key/time sig? will compare at the end across all albums accumulated
            avg_features = calculate_average_features(audio_features, features_to_average) #returns dict of features avg

            # add the average features dict to the album data dict
            album_data.update(avg_features)

            # keep in mind the avg of avgs across other albums isnt the same mathematically, if i were to want to do track data (like finding the mode for the keys etc as i noted above)

            print(f"Album: {album_data['album_title']} by {album_data['artist_name']}")
            for feature, avg_value in avg_features.items():
                print(f"Average {feature.capitalize()}: {avg_value:.2f}")

            # appends album data to the list of albums' datas
            # check/print this
            album_data_list.append(album_data)

    # convert this album data list to a new df with all the info it pulled
    album_data_df = pd.DataFrame(album_data_list)

    # here, we can analyze the data or do more with it
    # print(album_data_df)
    # print(type(album_data_df))
    # print(album_data_df.head())  # Show first few rows to confirm content

    # album_data_df # in jupyter notebook, this shows the df
    # album_data_df.head()  # Shows the first 5 rows

    display(album_data_df)

    # output the average length (calculate)
        # Calculate average album length
    if not album_data_df.empty:
        print(int(album_data_df['duration_ms'].mean()))
        average_length = int(album_data_df['duration_ms'].mean())
        print(f'Average album duration: {format_duration(average_length)} minutes')
    # else:
    #     print('No album data available.')

    # access genre by album title
    # album_title = 'GUTS'
    # first_album_genre = album_data_df[album_data_df['album_title'] == album_title]['genres'].values[0]
    # print(first_album_genre)

    # audio features average
    # print(album_data_df['audio_features'])

# hii

In [53]:
main()

88
0:39:18


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.421,0.695,6,-4.346,1,0.0428,0.218,3e-06,0.155,0.45,156.344,audio_features,34sOdxWu9FljH84UXdRwu1,spotify:track:34sOdxWu9FljH84UXdRwu1,https://api.spotify.com/v1/tracks/34sOdxWu9Flj...,https://api.spotify.com/v1/audio-analysis/34sO...,165834,4
1,0.631,0.881,9,-3.545,1,0.0995,0.00177,5e-06,0.0639,0.808,129.981,audio_features,3IX0yuEVvDbnqUwMBB3ouC,spotify:track:3IX0yuEVvDbnqUwMBB3ouC,https://api.spotify.com/v1/tracks/3IX0yuEVvDbn...,https://api.spotify.com/v1/audio-analysis/3IX0...,184784,4
2,0.484,0.53,5,-5.821,1,0.0538,0.154,0.0,0.263,0.323,136.848,audio_features,1kuGVB7EU95pJObxwvfwKS,spotify:track:1kuGVB7EU95pJObxwvfwKS,https://api.spotify.com/v1/tracks/1kuGVB7EU95p...,https://api.spotify.com/v1/audio-analysis/1kuG...,219724,4
3,0.395,0.367,3,-7.653,1,0.0324,0.815,0.0,0.11,0.426,77.968,audio_features,6QT6j7rKt7Vk3IuV2AUO9W,spotify:track:6QT6j7rKt7Vk3IuV2AUO9W,https://api.spotify.com/v1/tracks/6QT6j7rKt7Vk...,https://api.spotify.com/v1/audio-analysis/6QT6...,177212,4
4,0.351,0.884,1,-3.452,1,0.074,0.0623,0.0,0.335,0.408,120.166,audio_features,5sp71CUt0jXRNqHblPGp7b,spotify:track:5sp71CUt0jXRNqHblPGp7b,https://api.spotify.com/v1/tracks/5sp71CUt0jXR...,https://api.spotify.com/v1/audio-analysis/5sp7...,203369,4
5,0.55,0.409,7,-7.718,1,0.0316,0.386,0.0,0.122,0.252,133.099,audio_features,5CscrLqFBgPfZR8fGtikov,spotify:track:5CscrLqFBgPfZR8fGtikov,https://api.spotify.com/v1/tracks/5CscrLqFBgPf...,https://api.spotify.com/v1/audio-analysis/5Csc...,198867,4
6,0.48,0.24,7,-8.146,1,0.0323,0.844,0.0,0.099,0.152,80.686,audio_features,53dtP2iUMvaF28JZcHnFuU,spotify:track:53dtP2iUMvaF28JZcHnFuU,https://api.spotify.com/v1/tracks/53dtP2iUMvaF...,https://api.spotify.com/v1/audio-analysis/53dt...,231907,4
7,0.553,0.847,5,-5.736,1,0.187,0.0143,0.0,0.561,0.733,162.024,audio_features,2gyxAWHebV7xPYVxqoi86f,spotify:track:2gyxAWHebV7xPYVxqoi86f,https://api.spotify.com/v1/tracks/2gyxAWHebV7x...,https://api.spotify.com/v1/audio-analysis/2gyx...,211141,4
8,0.517,0.821,9,-3.504,1,0.105,0.00341,0.0,0.148,0.617,160.052,audio_features,26QLJMK8G0M06sk7h7Fkse,spotify:track:26QLJMK8G0M06sk7h7Fkse,https://api.spotify.com/v1/tracks/26QLJMK8G0M0...,https://api.spotify.com/v1/audio-analysis/26QL...,154517,4
9,0.511,0.313,10,-9.32,1,0.0846,0.905,0.0,0.0832,0.331,126.962,audio_features,3Nl5OkkmS5DaBZvuYofpAt,spotify:track:3Nl5OkkmS5DaBZvuYofpAt,https://api.spotify.com/v1/tracks/3Nl5OkkmS5Da...,https://api.spotify.com/v1/audio-analysis/3Nl5...,189386,4


Album: GUTS by Olivia Rodrigo
Average Tempo_avg: 128.61
Average Valence_avg: 0.43
Average Danceability_avg: 0.49
Average Energy_avg: 0.59
Average Acousticness_avg: 0.35
Average Liveness_avg: 0.18
Average Loudness_avg: -6.28
Average Speechiness_avg: 0.07
92
0:34:46


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.672,0.526,4,-6.446,1,0.0599,0.0311,1e-06,0.28,0.28,123.999,audio_features,6SRsiMl7w1USE4mFqrOhHC,spotify:track:6SRsiMl7w1USE4mFqrOhHC,https://api.spotify.com/v1/tracks/6SRsiMl7w1US...,https://api.spotify.com/v1/audio-analysis/6SRs...,143747,4
1,0.38,0.339,3,-7.885,1,0.0338,0.691,0.0,0.12,0.0849,100.607,audio_features,5CZ40GBx1sQ9agT82CLQCT,spotify:track:5CZ40GBx1sQ9agT82CLQCT,https://api.spotify.com/v1/tracks/5CZ40GBx1sQ9...,https://api.spotify.com/v1/audio-analysis/5CZ4...,229227,4
2,0.561,0.431,10,-8.81,1,0.0578,0.768,1.4e-05,0.106,0.137,143.875,audio_features,5wANPM4fQCJwkGd4rN57mH,spotify:track:5wANPM4fQCJwkGd4rN57mH,https://api.spotify.com/v1/tracks/5wANPM4fQCJw...,https://api.spotify.com/v1/audio-analysis/5wAN...,242013,4
3,0.473,0.203,2,-12.627,1,0.102,0.941,0.0,0.126,0.419,168.884,audio_features,4wcBRRpIfesgcyUtis7PEg,spotify:track:4wcBRRpIfesgcyUtis7PEg,https://api.spotify.com/v1/tracks/4wcBRRpIfesg...,https://api.spotify.com/v1/audio-analysis/4wcB...,163587,4
4,0.442,0.612,2,-7.222,1,0.112,0.584,6e-06,0.37,0.178,180.917,audio_features,6HU7h9RYOaPRFeh0R3UeAr,spotify:track:6HU7h9RYOaPRFeh0R3UeAr,https://api.spotify.com/v1/tracks/6HU7h9RYOaPR...,https://api.spotify.com/v1/audio-analysis/6HU7...,215507,4
5,0.563,0.664,9,-5.044,1,0.154,0.335,0.0,0.0849,0.688,166.928,audio_features,4ZtFanR9U6ndgddUvNcjcG,spotify:track:4ZtFanR9U6ndgddUvNcjcG,https://api.spotify.com/v1/tracks/4ZtFanR9U6nd...,https://api.spotify.com/v1/audio-analysis/4ZtF...,178147,4
6,0.638,0.259,7,-10.706,1,0.0898,0.753,0.0,0.219,0.269,88.485,audio_features,2TOzTqQXNmR2zDJXihjZ2e,spotify:track:2TOzTqQXNmR2zDJXihjZ2e,https://api.spotify.com/v1/tracks/2TOzTqQXNmR2...,https://api.spotify.com/v1/audio-analysis/2TOz...,202827,4
7,0.395,0.443,6,-9.72,1,0.133,0.765,1e-05,0.0839,0.338,168.924,audio_features,2tGvwE8GcFKwNdAXMnlbfl,spotify:track:2tGvwE8GcFKwNdAXMnlbfl,https://api.spotify.com/v1/tracks/2tGvwE8GcFKw...,https://api.spotify.com/v1/audio-analysis/2tGv...,175933,3
8,0.695,0.575,10,-6.334,0,0.116,0.198,0.0,0.0614,0.699,163.929,audio_features,0MMyJUC3WNnFS1lit5pTjk,spotify:track:0MMyJUC3WNnFS1lit5pTjk,https://api.spotify.com/v1/tracks/0MMyJUC3WNnF...,https://api.spotify.com/v1/audio-analysis/0MMy...,173160,4
9,0.369,0.272,9,-10.497,1,0.0364,0.866,0.0,0.147,0.218,172.929,audio_features,5JCoSi02qi3jJeHdZXMmR8,spotify:track:5JCoSi02qi3jJeHdZXMmR8,https://api.spotify.com/v1/tracks/5JCoSi02qi3j...,https://api.spotify.com/v1/audio-analysis/5JCo...,152667,4


Album: SOUR by Olivia Rodrigo
Average Tempo_avg: 141.52
Average Valence_avg: 0.32
Average Danceability_avg: 0.51
Average Energy_avg: 0.42
Average Acousticness_avg: 0.57
Average Liveness_avg: 0.15
Average Loudness_avg: -8.86
Average Speechiness_avg: 0.09
84
0:41:10


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.729,0.476,8,-4.985,1,0.0363,0.378,0.0,0.12,0.368,62.519,audio_features,39LmTF9RgyakzSYX8txrow,spotify:track:39LmTF9RgyakzSYX8txrow,https://api.spotify.com/v1/tracks/39LmTF9Rgyak...,https://api.spotify.com/v1/audio-analysis/39Lm...,212267,3
1,0.647,0.309,7,-7.948,0,0.0367,0.778,7e-06,0.202,0.195,87.046,audio_features,1TEL6MlSSVLSdhOSddidlJ,spotify:track:1TEL6MlSSVLSdhOSddidlJ,https://api.spotify.com/v1/tracks/1TEL6MlSSVLS...,https://api.spotify.com/v1/audio-analysis/1TEL...,171573,4
2,0.747,0.459,6,-6.891,1,0.306,0.448,0.0,0.252,0.47,75.021,audio_features,4uTvPEr01pjTbZgl7jcKBD,spotify:track:4uTvPEr01pjTbZgl7jcKBD,https://api.spotify.com/v1/tracks/4uTvPEr01pjT...,https://api.spotify.com/v1/audio-analysis/4uTv...,182000,4
3,0.757,0.665,2,-5.194,0,0.0774,0.0815,0.000158,0.216,0.643,151.979,audio_features,2hloaUoRonYssMuqLCBLTX,spotify:track:2hloaUoRonYssMuqLCBLTX,https://api.spotify.com/v1/tracks/2hloaUoRonYs...,https://api.spotify.com/v1/audio-analysis/2hlo...,216893,4
4,0.45,0.619,9,-7.412,1,0.373,0.329,0.0,0.319,0.427,83.221,audio_features,3wFLWP0FcIqHK1wb1CPthQ,spotify:track:3wFLWP0FcIqHK1wb1CPthQ,https://api.spotify.com/v1/tracks/3wFLWP0FcIqH...,https://api.spotify.com/v1/audio-analysis/3wFL...,208880,4
5,0.847,0.579,1,-5.314,1,0.072,0.0268,0.0546,0.102,0.569,137.998,audio_features,5Il6Oe7lr5XM7A0cWbVQtr,spotify:track:5Il6Oe7lr5XM7A0cWbVQtr,https://api.spotify.com/v1/tracks/5Il6Oe7lr5XM...,https://api.spotify.com/v1/audio-analysis/5Il6...,267107,4
6,0.71,0.715,7,-5.371,1,0.192,0.0845,1e-06,0.0617,0.479,105.044,audio_features,27356GVuMPFWiJSZCragoM,spotify:track:27356GVuMPFWiJSZCragoM,https://api.spotify.com/v1/tracks/27356GVuMPFW...,https://api.spotify.com/v1/audio-analysis/2735...,140693,3
7,0.29,0.364,9,-8.295,1,0.0293,0.418,1.9e-05,0.185,0.11,103.684,audio_features,2vdBo4ALPYbHRUPKgtE5iC,spotify:track:2vdBo4ALPYbHRUPKgtE5iC,https://api.spotify.com/v1/tracks/2vdBo4ALPYbH...,https://api.spotify.com/v1/audio-analysis/2vdB...,271467,4
8,0.662,0.6,0,-6.403,1,0.0704,0.173,0.000137,0.109,0.331,134.972,audio_features,4T652DlATVHe0jdLKaN3Bw,spotify:track:4T652DlATVHe0jdLKaN3Bw,https://api.spotify.com/v1/tracks/4T652DlATVHe...,https://api.spotify.com/v1/audio-analysis/4T65...,222947,4
9,0.777,0.317,1,-10.732,0,0.308,0.591,0.0,0.0881,0.33,139.848,audio_features,6ocbgoVGwYJhOv1GgI9NsF,spotify:track:6ocbgoVGwYJhOv1GgI9NsF,https://api.spotify.com/v1/tracks/6ocbgoVGwYJh...,https://api.spotify.com/v1/audio-analysis/6ocb...,178627,4


Album: thank u, next by Ariana Grande
Average Tempo_avg: 113.19
Average Valence_avg: 0.39
Average Danceability_avg: 0.67
Average Energy_avg: 0.53
Average Acousticness_avg: 0.30
Average Liveness_avg: 0.16
Average Loudness_avg: -6.62
Average Speechiness_avg: 0.14


Unnamed: 0,album_title,album_id,artist_name,release_date,total_tracks,duration_ms,duration,genres,popularity,uri,audio_features,tempo_avg,valence_avg,danceability_avg,energy_avg,acousticness_avg,liveness_avg,loudness_avg,speechiness_avg
0,GUTS,1xJHno7SmdVtZAtXbdbDZp,Olivia Rodrigo,2023-09-08,12,2358517,0:39:18,[],88,spotify:album:1xJHno7SmdVtZAtXbdbDZp,"[{'danceability': 0.421, 'energy': 0.695, 'key...",128.606333,0.431833,0.489833,0.5875,0.353434,0.176725,-6.278333,0.068933
1,SOUR,6s84u2TUpR3wdUv4NgKA2j,Olivia Rodrigo,2021-05-21,11,2086368,0:34:46,[],92,spotify:album:6s84u2TUpR3wdUv4NgKA2j,"[{'danceability': 0.672, 'energy': 0.526, 'key...",141.520909,0.316627,0.511909,0.420182,0.574555,0.154655,-8.861091,0.087191
2,"thank u, next",2fYhqwDWXjbpjaIJPEfKFw,Ariana Grande,2019-02-08,12,2470209,0:41:10,[],84,spotify:album:2fYhqwDWXjbpjaIJPEfKFw,"[{'danceability': 0.729, 'energy': 0.476, 'key...",113.192333,0.389,0.671917,0.525917,0.298075,0.15515,-6.622417,0.138108


2305031
Average album duration: 0:38:25 minutes
