1. Create a function to search a given **single** song in the Spotify API: **search_song(title, artist, limit)**. 

First importing the necessary libraries.

In [1]:
# Import functionality from other modules

import sys 
from config import *

In [2]:
import pandas as pd
import numpy as np

import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

In [3]:
#Initialize SpotiPy with user credentias #

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))

In [4]:
# Read both files

not_hot_songs = pd.read_csv('not_hot_songs.csv')
hot_songs = pd.read_csv('hot_songs.csv')

In [5]:
# Drop Unnamed:0 columns

hot_songs.drop(columns='Unnamed: 0', inplace=True)
not_hot_songs.drop(columns='Unnamed: 0', inplace=True)

In [6]:
display(not_hot_songs)
display(hot_songs)

Unnamed: 0,artist,title
0,Queen,Bohemian Rhapsody
1,Danny Vera,Roller Coaster
2,Eagles,Hotel California
3,Billy Joel,Piano Man
4,Coldplay,Fix You
...,...,...
1989,Stevie Wonder,Master Blaster (Jammin')
1990,Neil Young,Harvest
1991,Radiohead,High And Dry
1992,Caro Emerald,A Night Like This


Unnamed: 0,title,artists
0,Rockin' Around The Christmas Tree,Brenda Lee
1,All I Want For Christmas Is You,Mariah Carey
2,Jingle Bell Rock,Bobby Helms
3,Last Christmas,Wham!
4,A Holly Jolly Christmas,Burl Ives
...,...,...
95,El Amor de Su Vida,Grupo Frontera & Grupo Firme
96,Standing Next To You,Jung Kook
97,Man Made A Bar,Morgan Wallen Featuring Eric Church
98,Que Onda,Calle 24 x Chino Pacas x Fuerza Regida


In [7]:
# Rename column "artists" from not_hot_songs dataframe to "artist"
hot_songs.rename(columns={'artists':'artist'}, inplace=True)
display(hot_songs)

Unnamed: 0,title,artist
0,Rockin' Around The Christmas Tree,Brenda Lee
1,All I Want For Christmas Is You,Mariah Carey
2,Jingle Bell Rock,Bobby Helms
3,Last Christmas,Wham!
4,A Holly Jolly Christmas,Burl Ives
...,...,...
95,El Amor de Su Vida,Grupo Frontera & Grupo Firme
96,Standing Next To You,Jung Kook
97,Man Made A Bar,Morgan Wallen Featuring Eric Church
98,Que Onda,Calle 24 x Chino Pacas x Fuerza Regida


In [8]:
# We will start working on the hot_songs dataframe first

hot_songs.head(5)

Unnamed: 0,title,artist
0,Rockin' Around The Christmas Tree,Brenda Lee
1,All I Want For Christmas Is You,Mariah Carey
2,Jingle Bell Rock,Bobby Helms
3,Last Christmas,Wham!
4,A Holly Jolly Christmas,Burl Ives


In [9]:
# Create the get_song_ids function

def get_song_ids(df: pd.DataFrame):
    """
    Get the ID of the songs
    """
    import time
    
    list_of_ids = []
    
    # First, we are creating chunks:
    chunk_size = 50
    
    for start in range(0, len(df), chunk_size):
        chunk = df[start:start+chunk_size]
        
        for index, row in chunk.iterrows():
            try:
                #search_song = sp.search(q="tracks:"+df['title'][s]+" artist:"+df['artist'][s],limit=1)
                search_song = sp.search(q=row['title'], limit=1)
                song_id = search_song['tracks']['items'][0]['id']
                list_of_ids.append(song_id)
            
            except:
                print("Song not found!")
                list_of_ids.append("")
                
        print("Sleeping a bit before getting the next ids")
        time.sleep(10)
        
    return list_of_ids

In [10]:
# Execute the function on the hot_songs dataframe and create a list with all the ids

hot_song_id_list = get_song_ids(hot_songs)

Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids


In [11]:
# Display the list of ids

hot_song_id_list

['2EjXfH91m7f8HiJN1yQg97',
 '0bYg9bo50gSsH3LtXe2SQn',
 '7vQbuQcyTflfCIOu3Uzzya',
 '2FRnf9qhLbvw8fu4IBXx78',
 '77khP2fIVhSW23NwxrRluh',
 '5hslUAKq9I9CG2bAulFkHN',
 '2uFaJJtFpPDc5Pa95XzTvg',
 '0oPdaY4dXtc3ZsaG17V972',
 '4xhsWYTOGcal8zt0J161CU',
 '5ASM6Qjiav2xPe7gRkQMsQ',
 '0lizgQ7Qw35od7CYaoMBZb',
 '7uhWhGcR3kn9czLKlbO46q',
 '3YZE5qDV7u1ZD1gZc47ZeR',
 '72Z28IsvEVLjSWdUKEQgZ0',
 '46pF1zFimM582ss1PrMy68',
 '4KV9bM7a1KDc7b7OakFZic',
 '0rHToGels2lt8Y0mCYoF90',
 '3QiAAp20rPC3dcAtKtMaqQ',
 '1BxfuPKGuaTgP7aM0Bbdwr',
 '1vZKP9XURuqMp1SpXGnoyb',
 '7n7VsX3sv66znBwA8b5uhp',
 '2pnPe4pJtq7689i5ydzvJJ',
 '5a1iz510sv2W9Dt1MvFd5R',
 '3rUGC1vUpkDG9CZFHMur1t',
 '75dfH68JDisE8dDaD4KlVY',
 '1SV1fxF65n9NhRHp3KlBuu',
 '5a1iz510sv2W9Dt1MvFd5R',
 '3QIoEi8Enr9uHffwInGIsC',
 '2IGMVunIBsBLtEQyoI1Mu7',
 '6s2wpWPFPAgKg2LXxi1Oee',
 '4KULAymBBJcPRpk1yO4dOG',
 '5aIVCx5tnk0ntmdiinnYvw',
 '5xQskDSiHQeoebxoprn3HL',
 '38xhBO2AKrJnjdjVnhJES6',
 '7dJYggqjKo71KI9sLzqCs8',
 '59uQI0PADDKeE6UZDTJEe8',
 '7xapw9Oy21WpfEcib2ErSA',
 

2. Once the desired song is located, **the function should return the href/id/uri of the song to the code** (not to the user) to get the audio features.

Create a function **get_audio_features(list_of_song_ids)** to obtain the audio features of a given list of songs (the content of list_of_songs can be the href/id/uri or a list with a single song IDs). 

In [12]:
# Create a function to get the songs audio features

def get_audio_features(list_of_song_ids: list):
    """
    Using the song IDs to get the audio features out of the Spotify Database.
    """
    
    import time 
    
    feature_list = []
    feature_df = pd.DataFrame()
    # define a chunk size
    chunk_size = 50
    
    for start in range(0, len(list_of_song_ids), chunk_size):
        #for start in range(0,100,50) -> chunk_size is the increment. second loop will start from chunk_size
        try:
            features = sp.audio_features(tracks=list_of_song_ids[start:start+chunk_size])
            #features = sp.audio_features(tracks=[1,2,3,4,…50])
            
            for f in features:
                df_temp = pd.DataFrame([f])
                feature_df = pd.concat([feature_df, df_temp], ignore_index=True)
        
        except:
            #print(f'Error processing tracks ({i} : {i+chunk_size}) -> {err}')
            print("Error processing tracks")
    print("Sleeping a bit before getting the next features")
    time.sleep(10)
    
    return feature_df

In [13]:
# Execute the function on the hot_songs dataframe and create a list with all the ids

hot_song_feature_df = get_audio_features(hot_song_id_list)

Sleeping a bit before getting the next features


In [14]:
# Review dataframe created

hot_song_feature_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.589,0.472,8,-8.749,1,0.0502,0.614,0.0,0.505,0.898,67.196,audio_features,2EjXfH91m7f8HiJN1yQg97,spotify:track:2EjXfH91m7f8HiJN1yQg97,https://api.spotify.com/v1/tracks/2EjXfH91m7f8...,https://api.spotify.com/v1/audio-analysis/2EjX...,126267,4
1,0.336,0.627,7,-7.463,1,0.0384,0.164,0.0,0.0708,0.35,150.273,audio_features,0bYg9bo50gSsH3LtXe2SQn,spotify:track:0bYg9bo50gSsH3LtXe2SQn,https://api.spotify.com/v1/tracks/0bYg9bo50gSs...,https://api.spotify.com/v1/audio-analysis/0bYg...,241107,4
2,0.754,0.424,2,-8.463,1,0.0363,0.643,0.0,0.0652,0.806,119.705,audio_features,7vQbuQcyTflfCIOu3Uzzya,spotify:track:7vQbuQcyTflfCIOu3Uzzya,https://api.spotify.com/v1/tracks/7vQbuQcyTflf...,https://api.spotify.com/v1/audio-analysis/7vQb...,130973,4
3,0.735,0.478,2,-12.472,1,0.0293,0.189,2e-06,0.355,0.947,107.682,audio_features,2FRnf9qhLbvw8fu4IBXx78,spotify:track:2FRnf9qhLbvw8fu4IBXx78,https://api.spotify.com/v1/tracks/2FRnf9qhLbvw...,https://api.spotify.com/v1/audio-analysis/2FRn...,262960,4
4,0.683,0.375,0,-13.056,1,0.0303,0.579,0.0,0.076,0.888,140.467,audio_features,77khP2fIVhSW23NwxrRluh,spotify:track:77khP2fIVhSW23NwxrRluh,https://api.spotify.com/v1/tracks/77khP2fIVhSW...,https://api.spotify.com/v1/audio-analysis/77kh...,135533,4


3. Once the previous function has been created, create another function **add_audio_features(df, audio_features_df)** to concat a given dataframe with the audio features dataframe and return the extended data frame.

In [15]:
# Create a function to concatenate the hot_song_feature_df to the hot_songs one

def add_audio_features(df, audio_features_df):
    """
    Concats a given dataframe with the audio features dataframe and return the extended data frame. 
    """
    
    final_df = pd.concat([df, audio_features_df], axis=1)
    
    return final_df

In [16]:
# Execute the function

final_hot_songs = add_audio_features(hot_songs, hot_song_feature_df)

In [17]:
# Review concatenated dataframe

final_hot_songs.head()

Unnamed: 0,title,artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,Rockin' Around The Christmas Tree,Brenda Lee,0.589,0.472,8,-8.749,1,0.0502,0.614,0.0,0.505,0.898,67.196,audio_features,2EjXfH91m7f8HiJN1yQg97,spotify:track:2EjXfH91m7f8HiJN1yQg97,https://api.spotify.com/v1/tracks/2EjXfH91m7f8...,https://api.spotify.com/v1/audio-analysis/2EjX...,126267,4
1,All I Want For Christmas Is You,Mariah Carey,0.336,0.627,7,-7.463,1,0.0384,0.164,0.0,0.0708,0.35,150.273,audio_features,0bYg9bo50gSsH3LtXe2SQn,spotify:track:0bYg9bo50gSsH3LtXe2SQn,https://api.spotify.com/v1/tracks/0bYg9bo50gSs...,https://api.spotify.com/v1/audio-analysis/0bYg...,241107,4
2,Jingle Bell Rock,Bobby Helms,0.754,0.424,2,-8.463,1,0.0363,0.643,0.0,0.0652,0.806,119.705,audio_features,7vQbuQcyTflfCIOu3Uzzya,spotify:track:7vQbuQcyTflfCIOu3Uzzya,https://api.spotify.com/v1/tracks/7vQbuQcyTflf...,https://api.spotify.com/v1/audio-analysis/7vQb...,130973,4
3,Last Christmas,Wham!,0.735,0.478,2,-12.472,1,0.0293,0.189,2e-06,0.355,0.947,107.682,audio_features,2FRnf9qhLbvw8fu4IBXx78,spotify:track:2FRnf9qhLbvw8fu4IBXx78,https://api.spotify.com/v1/tracks/2FRnf9qhLbvw...,https://api.spotify.com/v1/audio-analysis/2FRn...,262960,4
4,A Holly Jolly Christmas,Burl Ives,0.683,0.375,0,-13.056,1,0.0303,0.579,0.0,0.076,0.888,140.467,audio_features,77khP2fIVhSW23NwxrRluh,spotify:track:77khP2fIVhSW23NwxrRluh,https://api.spotify.com/v1/tracks/77khP2fIVhSW...,https://api.spotify.com/v1/audio-analysis/77kh...,135533,4


In [18]:
# Save final_hot_songs dataframe in .csv file

final_hot_songs.to_csv('final_hot_songs.csv')

In [19]:
# Let's do the same process but this time for the not_hot_songs dataframe

not_hot_song_id_list = get_song_ids(not_hot_songs)

Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a bit before getting the next ids
Sleeping a 

In [20]:
# Display the list of ids - As we have a lot, we would only do a len() of the list
# We have 1994 songs because we removed 6 of them in the previous lab

len(not_hot_song_id_list)

1994

In [21]:
# Execute the function get_audio_features() on the not_hot_songs dataframe and create a list with all the ids

not_hot_song_feature_df = get_audio_features(not_hot_song_id_list)

Sleeping a bit before getting the next features


In [22]:
# Display dataframe

display(not_hot_song_feature_df)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.397,0.3860,0,-10.405,0,0.0503,0.27100,0.000000,0.1880,0.210,144.242,audio_features,6l8GvAyoUZwWDgF1e4822w,spotify:track:6l8GvAyoUZwWDgF1e4822w,https://api.spotify.com/v1/tracks/6l8GvAyoUZwW...,https://api.spotify.com/v1/audio-analysis/6l8G...,355400,4
1,0.713,0.8990,7,-3.386,1,0.0584,0.09560,0.000000,0.3870,0.611,120.027,audio_features,1fP8o0lIaSIE5jEeblT8df,spotify:track:1fP8o0lIaSIE5jEeblT8df,https://api.spotify.com/v1/tracks/1fP8o0lIaSIE...,https://api.spotify.com/v1/audio-analysis/1fP8...,179373,4
2,0.579,0.5080,2,-9.484,1,0.0270,0.00574,0.000494,0.0575,0.609,147.125,audio_features,40riOy7x9W7GXjyGp4pjAv,spotify:track:40riOy7x9W7GXjyGp4pjAv,https://api.spotify.com/v1/tracks/40riOy7x9W7G...,https://api.spotify.com/v1/audio-analysis/40ri...,391376,4
3,0.331,0.5500,0,-6.483,1,0.0272,0.60500,0.000004,0.1920,0.429,177.734,audio_features,70C4NyhjD5OZUMzvWZ3njJ,spotify:track:70C4NyhjD5OZUMzvWZ3njJ,https://api.spotify.com/v1/tracks/70C4NyhjD5OZ...,https://api.spotify.com/v1/audio-analysis/70C4...,339000,3
4,0.209,0.4170,3,-8.740,1,0.0338,0.16400,0.001960,0.1130,0.124,138.178,audio_features,7LVHVU3tWfcxj5aiPFEW4Q,spotify:track:7LVHVU3tWfcxj5aiPFEW4Q,https://api.spotify.com/v1/tracks/7LVHVU3tWfcx...,https://api.spotify.com/v1/audio-analysis/7LVH...,295533,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1989,0.877,0.4220,10,-14.933,0,0.0546,0.04000,0.000406,0.1820,0.970,131.103,audio_features,5EAgXGJ8Kw5QAfhQkZXYqT,spotify:track:5EAgXGJ8Kw5QAfhQkZXYqT,https://api.spotify.com/v1/tracks/5EAgXGJ8Kw5Q...,https://api.spotify.com/v1/audio-analysis/5EAg...,307933,4
1990,0.641,0.6770,6,-7.948,1,0.2250,0.49100,0.000077,0.1850,0.503,74.986,audio_features,1ejyx2kXWFY6QuYHu7yOyD,spotify:track:1ejyx2kXWFY6QuYHu7yOyD,https://api.spotify.com/v1/tracks/1ejyx2kXWFY6...,https://api.spotify.com/v1/audio-analysis/1ejy...,136998,4
1991,0.419,0.3830,4,-11.782,1,0.0256,0.07240,0.017600,0.0896,0.350,87.568,audio_features,2a1iMaoWQ5MnvLFBDv4qkf,spotify:track:2a1iMaoWQ5MnvLFBDv4qkf,https://api.spotify.com/v1/tracks/2a1iMaoWQ5Mn...,https://api.spotify.com/v1/audio-analysis/2a1i...,257480,4
1992,0.773,0.0272,7,-25.118,1,0.0708,0.93800,0.862000,0.0967,0.522,107.992,audio_features,7fk9IaVqs4DKkHQvcswJuL,spotify:track:7fk9IaVqs4DKkHQvcswJuL,https://api.spotify.com/v1/tracks/7fk9IaVqs4DK...,https://api.spotify.com/v1/audio-analysis/7fk9...,112932,4


In [23]:
# Execute function add_audio_features()

final_not_hot_songs = add_audio_features(not_hot_songs, not_hot_song_feature_df)

In [24]:
# Review dataframe

final_not_hot_songs.head()

Unnamed: 0,artist,title,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,Queen,Bohemian Rhapsody,0.397,0.386,0,-10.405,0,0.0503,0.271,0.0,0.188,0.21,144.242,audio_features,6l8GvAyoUZwWDgF1e4822w,spotify:track:6l8GvAyoUZwWDgF1e4822w,https://api.spotify.com/v1/tracks/6l8GvAyoUZwW...,https://api.spotify.com/v1/audio-analysis/6l8G...,355400,4
1,Danny Vera,Roller Coaster,0.713,0.899,7,-3.386,1,0.0584,0.0956,0.0,0.387,0.611,120.027,audio_features,1fP8o0lIaSIE5jEeblT8df,spotify:track:1fP8o0lIaSIE5jEeblT8df,https://api.spotify.com/v1/tracks/1fP8o0lIaSIE...,https://api.spotify.com/v1/audio-analysis/1fP8...,179373,4
2,Eagles,Hotel California,0.579,0.508,2,-9.484,1,0.027,0.00574,0.000494,0.0575,0.609,147.125,audio_features,40riOy7x9W7GXjyGp4pjAv,spotify:track:40riOy7x9W7GXjyGp4pjAv,https://api.spotify.com/v1/tracks/40riOy7x9W7G...,https://api.spotify.com/v1/audio-analysis/40ri...,391376,4
3,Billy Joel,Piano Man,0.331,0.55,0,-6.483,1,0.0272,0.605,4e-06,0.192,0.429,177.734,audio_features,70C4NyhjD5OZUMzvWZ3njJ,spotify:track:70C4NyhjD5OZUMzvWZ3njJ,https://api.spotify.com/v1/tracks/70C4NyhjD5OZ...,https://api.spotify.com/v1/audio-analysis/70C4...,339000,3
4,Coldplay,Fix You,0.209,0.417,3,-8.74,1,0.0338,0.164,0.00196,0.113,0.124,138.178,audio_features,7LVHVU3tWfcxj5aiPFEW4Q,spotify:track:7LVHVU3tWfcxj5aiPFEW4Q,https://api.spotify.com/v1/tracks/7LVHVU3tWfcx...,https://api.spotify.com/v1/audio-analysis/7LVH...,295533,4


In [25]:
# Save final_not_hot_songs dataframe in .csv file

final_not_hot_songs.to_csv('final_not_hot_songs.csv')