In [1]:
# import library
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from datetime import datetime
import os
from dotenv import load_dotenv
import json
import time
import requests

print(f"succesful importing all librares")

succesful importing all librares


In [2]:
load_dotenv()

# access to spotify ID
auth_manager = SpotifyClientCredentials(
    client_id=os.getenv('SPOTIFY_CLIENT_ID'),
    client_secret=os.getenv('SPOTIFY_CLIENT_SECRET')
)
sp = spotipy.Spotify(auth_manager=auth_manager)


## Data extracting

In [34]:
# get all playist 
def get_playlist():
    """
    Find official hits from 2010 to 2025 

    """

    playlists = {} # list of playlists information from years 

    # collecting the data fronm 2010 to 2025
    for year in range(2010, 2026):
        # searching for keyword
        results = sp.search(q = f'top hits of {year}', type = 'playlist', market = 'CA', limit = 5)

        for playlist in results['playlists']['items']:

            if playlist != None:  
                if f'top hits of {year}' in str(playlist['name']).lower():# key word matched 
                    # add the track for the playlist 
                    playlists[f'top hits of {year}'] ={
                        'year': year,
                        'id': playlist['id'],
                        'name' : playlist['name']
                    }

                    # only one playlist for one year 
                    break 

    

    return playlists


# collecting tracks data from playlist 
def collect_playlist_tracks(playlist_id, playlist_name, playlist_year):
    """
    Collects all tracks from a playlist with detailed information 

    """
    print(f"Collecting tracks form: {playlist_name}")


    results = sp.playlist_tracks(playlist_id= playlist_id, market='CA')

    tracks_data=[]# collection of alll tracks in playlist 

    # accessing to each track in playlist 
    for idx, item in enumerate(results['items'], 1):
        if item['track'] is None:
            continue

        track = item['track']
        
        track_info = { # data form of each track 
            'year' : playlist_year,
            'collection_date': datetime.now().strftime('%Y-%m-%d'),
            'collection_timestamp': datetime.now().isoformat(),
            'playlist_rank': idx,
            'playlist_name': playlist_name,
            'track_id': track['id'],
            'track_name': track['name'],
            'artist_id': track['artists'][0]['id'],
            'artist_name': track['artists'][0]['name'],
            'album_name': track['album']['name'],
            'album_release_date': track['album']['release_date'],
            'duration_ms': track['duration_ms'],
            'popularity': track['popularity'],
            'explicit': track['explicit']
        }
        
        tracks_data.append(track_info)
    
    return tracks_data

# Turning spotify track ids to recoo beat track_id 
def get_track_id_recoobeat(track_ids):

    url = "https://api.reccobeats.com/v1/track?"
    headers = {'Accept': 'application/json'}  # Fixed typo
    batch = 39  # limit return for each request
    all_id = {}

    for i in range(0, len(track_ids), batch):
        spotify_ids = track_ids[i:batch+i]
        params = [('ids', id) for id in spotify_ids]  # end points for spotify_ids

        try:
            response = requests.get(url=url, headers=headers, params=params)

            if response.status_code == 200:
                data = response.json()
                content = data['content']
                
                for j in range(0, len(content)):  # Changed 'i' to 'j'
                    
                    # using spotify_Id as key 
                    all_id[content[j]['href'].split("track/")[1]] = [
                        content[j]['id'],
                        [a['name'] for a in content[j]['artists']]
                    ]
                
            else:
                print(f"Error: {response.status_code}")
                return None 

        except Exception as e:
            print(f'Exception occurred: {e}')
            return None 

    return all_id

### Collecting audio features for each track
def get_audio_features(recoo_beat_id):
    """
    Get audio features for multiple tracks at once
    Audio features include: danceability, energy, key, loudness, mode, 
    speechiness, acousticness, instrumentalness, liveness, valence, tempo
    """
    

    url = "https://api.reccobeats.com/v1/audio-features"
    headers = {'Accept': 'application/json'}
    batch = 39  # limit return for each request
    feature =[] # all feature for all track 

    for i in range(0, len(recoo_beat_id), batch):
        recoo_beat_ids = recoo_beat_id[i:batch+i]
        params = [('ids', id) for id in recoo_beat_ids]  # end points for spotify_ids

        try:
            response = requests.get(url=url, headers=headers, params=params)

            if response.status_code == 200:
                data = response.json()
                content = data['content']
                feature.extend(content)
                
            else:
                print(f"Error: {response.status_code}")
                return None 

        except Exception as e:
            print(f'Exception occurred: {e}')
            return None
        
    return feature




In [48]:
def main():
    """
    Main function for Collecting all the data 
    By running this code, remove all the dataset before and retracking the data 

    """

    print(f"=" * 20)
    print(f"Music trend data colllection")
    print(f'=' * 20)
    print(f"Collection Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print()

    ### PART I
    ### Collecting playlists data 
    print("Finding top hits")
    playlists = get_playlist() # Collecting the playlist for top hits 

    if not playlists: # if nothing match the keywords
        print("Could not find any playlist")
        exit

    
    print()
    print(f"Found {len(playlists)} playlists")

    for key, playlist in playlists.items():
        print(f"{playlist['name']}")

    print('Finishing Collecting all playlists')
    print()



    ### PART II
    ### Collecting tracks from all the playlists 
    print(f'=' *20)
    print("Collecting tracks data")
    print(f'='* 20)
    # all tracks data 
    all_track = []

    for playlist_type, playlist_info in playlists.items():# Accessing to the playlist of each years 

        tracks = collect_playlist_tracks(playlist_info['id'], playlist_info['name'], playlist_info['year'])# playlist detail 
        all_track.extend(tracks)

    print("Finishing collecting all tracks")
    print()
    # Coverting to dataframe
    print(f'Coverting tracks dataset to dataframe')
    tracks_df = pd.DataFrame(all_track)
    print('Finishing coverting to dataframe')
    print()

    display(tracks_df.head(5))

    ## PART III
    ## Transfering spotifyID to Recoo BeatID 
    print(f'='*20)
    print('Retriving Recoo Beat Ids')
    print(f'='*20)

    spotify_ids = tracks_df['track_id'].tolist()# list of spotify_id for every track

    recoo_beat_ids = get_track_id_recoobeat(track_ids= spotify_ids)

    recoo_beat_ids_df = pd.DataFrame.from_dict(recoo_beat_ids, orient= 'index', columns = ['UUID', 'Artist'])
    recoo_beat_ids_df = recoo_beat_ids_df.reset_index(names = ['track_id'])

    display(recoo_beat_ids_df.head(5))
    ### Part IV
    ### getting audio features 
    print(f'='*20)
    print('Getting all audio feautes')
    print(f'='*20)

    features = get_audio_features(recoo_beat_id= recoo_beat_ids_df['UUID'])
    feature_df = pd.DataFrame(features)

    display(feature_df.head(5))

    ## PART IV
    ## Merging all Dataframe into one

    print(f'='*20)
    print('Merging Dataframe')
    print(f'='*20)

    merged_12 = pd.merge(tracks_df, recoo_beat_ids_df, how= 'left', left_on= ['track_id'], right_on=['track_id'])
    final_df = pd.merge(merged_12, feature_df, how= 'left', left_on= ['UUID'], right_on= ['id'] )

    print('Finishing merging data')

    return final_df
collecting_data = main()

Music trend data colllection
Collection Date: 2026-01-05 12:35:53

Finding top hits

Found 14 playlists
Top Hits of 2010
Top Hits of 2011
Top Hits of 2012
Top Hits of 2013
Top Hits of 2014
Top Hits of 2015
Top Hits of 2016
TOP HITS OF 2017
Top Hits of 2018
Top Hits of 2020
Top Hits of 2021
Top Hits of 2022
Top Hits of 2023
Top Hits of 2024
Finishing Collecting all playlists

Collecting tracks data
Collecting tracks form: Top Hits of 2010
Collecting tracks form: Top Hits of 2011
Collecting tracks form: Top Hits of 2012
Collecting tracks form: Top Hits of 2013
Collecting tracks form: Top Hits of 2014
Collecting tracks form: Top Hits of 2015
Collecting tracks form: Top Hits of 2016
Collecting tracks form: TOP HITS OF 2017
Collecting tracks form: Top Hits of 2018
Collecting tracks form: Top Hits of 2020
Collecting tracks form: Top Hits of 2021
Collecting tracks form: Top Hits of 2022
Collecting tracks form: Top Hits of 2023
Collecting tracks form: Top Hits of 2024
Finishing collecting all 

Unnamed: 0,year,collection_date,collection_timestamp,playlist_rank,playlist_name,track_id,track_name,artist_id,artist_name,album_name,album_release_date,duration_ms,popularity,explicit
0,2010,2026-01-05,2026-01-05T12:36:00.882838,1,Top Hits of 2010,2H1047e0oMSj10dgp7p2VG,I Gotta Feeling,1yxSLGMDHlW21z4YXirZDS,Black Eyed Peas,THE E.N.D. (THE ENERGY NEVER DIES),2009-01-01,289133,78,False
1,2010,2026-01-05,2026-01-05T12:36:00.882857,2,Top Hits of 2010,7BqBn9nzAq8spo5e7cZ0dJ,Just the Way You Are,0du5cEVh5yTK9QJze8zA0C,Bruno Mars,Doo-Wops & Hooligans,2010-05-11,220734,86,False
2,2010,2026-01-05,2026-01-05T12:36:00.882866,3,Top Hits of 2010,15JINEqzVMv3SvJTAXAKED,Love The Way You Lie,7dGJo4pcD2V6oG8kP0tJRR,Eminem,Recovery,2010-06-18,263373,88,True
3,2010,2026-01-05,2026-01-05T12:36:00.882874,4,Top Hits of 2010,0SiywuOBRcynK0uKGWdCnn,Bad Romance,1HY2Jd0NmPuamShAr6KMms,Lady Gaga,The Fame Monster (Deluxe Edition),2009-11-05,294573,86,True
4,2010,2026-01-05,2026-01-05T12:36:00.882883,5,Top Hits of 2010,2oENJa1T33GJ0w8dC167G4,Fire Burning,6S0dmVVn4udvppDhZIWxCr,Sean Kingston,Tomorrow,2009-09-07,239986,74,False


Retriving Recoo Beat Ids


Unnamed: 0,track_id,UUID,Artist
0,7HacCTm33hZYYN8DXpCYuG,8b7662af-6bf0-47ba-b88e-c6aae8f5f634,"[Enrique Iglesias, Pitbull]"
1,7BqBn9nzAq8spo5e7cZ0dJ,b184f2a6-786e-4f4f-a38a-ead4c6eacfa3,"[Bruno Mars, Wayne Martin]"
2,6tS3XVuOyu10897O3ae7bi,b49cd6f6-6170-437c-a2b9-0ba7fd5089bb,"[Katy Perry, Snoop Dogg]"
3,7AqISujIaWcY3h5zrOqt5v,dd3e2075-b8f2-49c8-90a0-336014a96456,[CeeLo Green]
4,2rDwdvBma1O1eLzo29p2cr,aedbf62d-725d-4a4f-a4ec-4c6b69bbdf76,[Adam Lambert]


Getting all audio feautes


Unnamed: 0,id,href,isrc,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,valence
0,8b7662af-6bf0-47ba-b88e-c6aae8f5f634,https://open.spotify.com/track/7HacCTm33hZYYN8...,GBUM71003038,0.021,0.648,0.942,0.0,10,0.0594,-2.881,0,0.0878,129.007,0.73
1,b184f2a6-786e-4f4f-a38a-ead4c6eacfa3,https://open.spotify.com/track/7BqBn9nzAq8spo5...,USAT21001269,0.0134,0.635,0.841,0.0,5,0.0622,-5.379,1,0.0422,109.021,0.424
2,b49cd6f6-6170-437c-a2b9-0ba7fd5089bb,https://open.spotify.com/track/6tS3XVuOyu10897...,USCA21001135,0.00452,0.791,0.755,0.0,0,0.163,-3.729,1,0.0568,125.014,0.426
3,89618b76-4880-46c4-9905-89ef8fa4f68d,https://open.spotify.com/track/4AYX69oFP3UOS1C...,USAT21001515,0.0205,0.844,0.601,0.0,1,0.385,-5.283,1,0.157,73.989,0.331
4,9ed7dbb8-e4bf-4c54-8b7d-4f9fa00c9d8a,https://open.spotify.com/track/1a9hBnJodCsNcp0...,GBUM71029333,0.941,0.551,0.197,0.000752,10,0.144,-12.003,1,0.03,134.102,0.404


Merging Dataframe
Finishing merging data


## Saving data 

In [None]:
# transfering to a csv file 
collecting_data.to_csv('/Users/tranminu/Documents/spotify-mood-analyze/data/general_data.csv')