In [1]:
#import Spotify credentials
import sys
from config import *

#import necessary libraries
import requests
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np

response = requests.get("https://developer.spotify.com/dashboard/e6694c5fb84f4d1f9ce67605909b7019")


#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=Client_ID,
                                                           client_secret=Client_Secret))
#Testing status
results = sp.search(q="Daft Punk", limit=5)

#results
print(response.status_code)
# print ('------------------')
# print(results)

200


In [2]:
#import csv files
hot_songs = pd.read_csv('hot_songs') #add .csv 
not_hot_songs = pd.read_csv('not_hot_songs.csv') 

In [3]:
#cleaning up columns for future merge

hot_songs = hot_songs.rename(columns={'Titles': 'track_name', 'Artists': 'artists'})
display(hot_songs)

Unnamed: 0,track_name,artists
0,rockin' around the christmas tree,brenda lee
1,all i want for christmas is you,mariah carey
2,jingle bell rock,bobby helms
3,last christmas,wham!
4,a holly jolly christmas,burl ives
...,...,...
95,el amor de su vida,grupo frontera & grupo firme
96,standing next to you,jung kook
97,man made a bar,morgan wallen featuring eric church
98,que onda,calle 24 x chino pacas x fuerza regida


In [4]:
#cleaning up columns for future merge

not_hot_songs = not_hot_songs.drop(columns=['index'],axis=1)
display(not_hot_songs)

Unnamed: 0,track_name,artists
0,rotunda,surgeon
1,nosara,little symphony
2,rat patrol,naked raygun
3,who loves the sun - 2015 remaster,the velvet underground
4,sana değmez,3 hürel
...,...,...
2995,shapes of mystery,tony o'connor
2996,fractures,illenium;nevve
2997,one night standards,ashley mcbryde
2998,fake,lauv;conan gray


In [5]:
#creating a search function

def search_song(df, column_title, column_artist, limit=1):
    '''
    Takes a dataframe with two columns: track_name and artists
    Returns a DataFrame with three columns: track_name, artists, id
    '''
    # Initialize Spotipy
    client_credentials_manager = SpotifyClientCredentials(client_id='YOUR_CLIENT_ID', client_secret='YOUR_CLIENT_SECRET')
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    
    # Create an empty DataFrame to store the results
    result_df = pd.DataFrame(columns=['track_name', 'artists', 'id'])
    
    # Iterate through each row of the input DataFrame
    for index, row in df.iterrows():
        # Search for the track using the title and artists from the DataFrame
        track_name = row[column_title]
        artists = row[column_artist]
        query = f'track:"{track_name}" artist:"{artists}"'
        results = sp.search(q=query, limit=limit)
        
        # Extract the track ID(s) from the search results
        track_ids = [item['id'] for item in results['tracks']['items']]
        
        # If there are track IDs, append them to the result DataFrame
        if track_ids:
            for track_id in track_ids:
                result_df = pd.concat([result_df, pd.DataFrame({'track_name': [track_name], 'artists': [artists], 'id': [track_id]})], ignore_index=True)
    
    return result_df


In [9]:
#search hot songs ids, and drop any rows that doesn't match spotify's database for title+artist
clean_hot_songs = search_song(hot_songs, 'track_name', 'artists', limit=1)
display(clean_hot_songs)


Unnamed: 0,track_name,artists,id
0,rockin' around the christmas tree,brenda lee,2EjXfH91m7f8HiJN1yQg97
1,all i want for christmas is you,mariah carey,0bYg9bo50gSsH3LtXe2SQn
2,jingle bell rock,bobby helms,7vQbuQcyTflfCIOu3Uzzya
3,last christmas,wham!,2FRnf9qhLbvw8fu4IBXx78
4,a holly jolly christmas,burl ives,77khP2fIVhSW23NwxrRluh
...,...,...,...
64,mind on you,george birge,7rr3gguU3TcVnLZD9VzASE
65,feather,sabrina carpenter,2Zo1PcszsT9WQ0ANntJbID
66,can't catch me now,olivia rodrigo,56xHMIfQPoe0prrSi3BGhf
67,standing next to you,jung kook,2KslE17cAJNHTsI2MI0jb2


In [10]:
#search not hot songs ids, and drop any rows that doesn't match spotify's database for title+artist
clean_not_hot_songs = search_song(not_hot_songs, 'track_name', 'artists', limit=1)
display(clean_not_hot_songs)

Unnamed: 0,track_name,artists,id
0,rotunda,surgeon,0ITThluJ75byBK7SdUwR5W
1,nosara,little symphony,2MV2CBFcQa6WPLFMicSvu3
2,rat patrol,naked raygun,77wihvKjsijCbjkGnuAnnn
3,who loves the sun - 2015 remaster,the velvet underground,1rJi8cf8OWsrX4CqBnMSoQ
4,sana değmez,3 hürel,5qHHMrS7MnxayRuf6wXqUV
...,...,...,...
2051,independência,capital inicial,1w2hqo17O8LNsriboNkBI0
2052,millionaire,plastilina mosh,4WfzRSiFd1ZlcIPLYAjLgg
2053,aaj phir kyu,anupam mukherjee,1kOfQVjzwH96tdrz2emKus
2054,one night standards,ashley mcbryde,47qZzTLLJcuD69AIKnquo4


In [11]:
#transform both dataframe's IDs coolumns as lists to be fed into the next function (audio_feature)
hot_songs_list = clean_hot_songs['id'].tolist()
not_hot_songs_list = clean_not_hot_songs['id'].tolist()


In [12]:
#this function fetches the audio features based on IDs

import time
from IPython.display import display
import ipywidgets as widgets

def get_audio_features(list_of_song_ids):
    chunk_size = 50
    audio_features_list = []  # List to store audio features
    
    progress_bar = widgets.IntProgress(
        min=0, 
        max=len(list_of_song_ids), 
        description='Processing audio features :', 
        bar_style='', 
        style={'bar_color': '#1ED760'})
    
    display(progress_bar)

    for i in range(0, len(list_of_song_ids), chunk_size):
        chunk = list_of_song_ids[i:i + chunk_size]

        try:
            # Retrieve audio features for the chunk of song IDs
            audio_features = sp.audio_features(chunk)
            audio_features_list.extend(audio_features)
        except Exception as e:
            print("Error retrieving audio features:", e)

        time.sleep(20)  # Sleep to avoid rate limiting
        progress_bar.value = i + chunk_size  # Update progress bar value

    # Create a DataFrame from the list of audio features
    df = pd.DataFrame(audio_features_list)

    return df

In [13]:
#fetching audio features for the hot songs (takes about 1 min)
hot_songs_audio_features = get_audio_features(hot_songs_list)

IntProgress(value=0, description='Processing audio features :', max=69, style=ProgressStyle(bar_color='#1ED760…

In [14]:
#fetching audio features for the not hot songs (takes about 14 min)
not_hot_songs_audio_features = get_audio_features(not_hot_songs_list) 

IntProgress(value=0, description='Processing audio features :', max=2056, style=ProgressStyle(bar_color='#1ED7…

In [15]:
#this function concatenate two dataframes based on the song ID

def add_audio_features(df, audio_features_df):
    # Merge the dataframes on the 'id' column
    merged_df = pd.merge(df, audio_features_df, left_on='id', right_on='id', how='left')
    return merged_df

In [16]:
#now we concatenate both dataframes (clean + audio features)

hot_songs_final = add_audio_features(clean_hot_songs,hot_songs_audio_features)
not_hot_songs_final = add_audio_features(clean_not_hot_songs,not_hot_songs_audio_features)

In [17]:
#Finally we save both cleaned and fetured dataframes as CSV files

hot_songs_final.to_csv('hot_songs_final.csv')
not_hot_songs_final.to_csv('not_hot_songs_final.csv')