In [1]:
# import library
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from datetime import datetime
import os
from dotenv import load_dotenv
import json
import time
import requests

print(f"succesful importing all librares")

succesful importing all librares


**II) Data Conecting**

In [2]:
load_dotenv()

# access to spotify ID
auth_manager = SpotifyClientCredentials(
    client_id=os.getenv('SPOTIFY_CLIENT_ID'),
    client_secret=os.getenv('SPOTIFY_CLIENT_SECRET')
)
sp = spotipy.Spotify(auth_manager=auth_manager)


Data extracting

In [None]:
# get all playist 
def get_playlist():
    """
    Find official hits from 2010 to 2025 

    """

    playlists = {} # list of playlists information from years 

    # collecting the data fronm 2010 to 2025
    for year in range(2010, 2026):
        # searching for keyword
        results = sp.search(q = f'top hits of {year}', type = 'playlist', market = 'CA', limit = 5)

        for playlist in results['playlists']['items']:

            if playlist != None:  
                if f'top hits of {year}' in str(playlist['name']).lower():# key word matched 
                    # add the track for the playlist 
                    playlists[f'top hits of {year}'] ={
                        'year': year,
                        'id': playlist['id'],
                        'name' : playlist['name']
                    }

                    # only one playlist for one year 
                    break 

    

    return playlists


# collecting tracks data from playlist 
def collect_playlist_tracks(playlist_id, playlist_name, playlist_year):
    """
    Collects all tracks from a playlist with detailed information 

    """
    print(f"Collecting tracks form: {playlist_name}")


    results = sp.playlist_tracks(playlist_id= playlist_id, market='CA')

    tracks_data=[]# collection of alll tracks in playlist 

    # accessing to each track in playlist 
    for idx, item in enumerate(results['items'], 1):
        if item['track'] is None:
            continue

        track = item['track']
        
        track_info = { # data form of each track 
            'year' : playlist_year,
            'collection_date': datetime.now().strftime('%Y-%m-%d'),
            'collection_timestamp': datetime.now().isoformat(),
            'playlist_rank': idx,
            'playlist_name': playlist_name,
            'track_id': track['id'],
            'track_name': track['name'],
            'artist_id': track['artists'][0]['id'],
            'artist_name': track['artists'][0]['name'],
            'album_name': track['album']['name'],
            'album_release_date': track['album']['release_date'],
            'duration_ms': track['duration_ms'],
            'popularity': track['popularity'],
            'explicit': track['explicit']
        }
        
        tracks_data.append(track_info)
    
    return tracks_data

# Turning spotify track ids to recoo beat track_id 
def get_track_id_recoobeat(track_ids):

    url = "https://api.reccobeats.com/v1/track?"
    headers = {'Accept': 'application/json'}  # Fixed typo
    batch = 39  # limit return for each request
    all_id = {}

    for i in range(0, len(track_ids), batch):
        spotify_ids = track_ids[i:batch+i]
        params = [('ids', id) for id in spotify_ids]  # end points for spotify_ids

        try:
            response = requests.get(url=url, headers=headers, params=params)

            if response.status_code == 200:
                data = response.json()
                content = data['content']
                
                for j in range(0, len(content)):  # Changed 'i' to 'j'
                    
                    # using spotify_Id as key 
                    all_id[content[j]['href'].split("track/")[1]] = {
                        'id': content[j]['id'],
                        'artists': [a['name'] for a in content[j]['artists']]
                    }
                
            else:
                print(f"Error: {response.status_code}")
                return None 

        except Exception as e:
            print(f'Exception occurred: {e}')
            return None 

    return all_id

### Collecting audio features for each track
def get_audio_features(recoo_beat_id, spotify_id):
    """
    Get audio features for multiple tracks at once
    Audio features include: danceability, energy, key, loudness, mode, 
    speechiness, acousticness, instrumentalness, liveness, valence, tempo
    """
    
    track_features = []
    try :


        url = f"https://api.reccobeats.com/v1/track/{recoo_beat_id}/audio-features"

        payload = {}
        headers = {
        'Accept': 'application/json'
        }

        response = requests.request("GET", url, headers=headers, data=payload)

        if response.status_code ==200:
            data = response.json()

            # add the corresponding spotify ID
            data['spotify_id'] = spotify_id

            track_features.append(data)

            return track_features
        
        else: 
            print(f"Error url request: {response.status_code}")
    except Exception as e:
        print(f"Error occured: {e}")

        return None 


In [66]:
def main():
    """
    Main function for Collecting all the data 
    By running this code, remove all the dataset before and retracking the data 

    """

    print(f"=" * 20)
    print(f"Music trend data colllection")
    print(f'=' * 20)
    print(f"Collection Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print()



    ### PART I
    ### Collecting playlists data 
    print("Finding top hits")
    playlists = get_playlist() # Collecting the playlist for top hits 

    if not playlists: # if nothing match the keywords
        print("Could not find any playlist")
        exit

    
    print()
    print(f"Found {len(playlists)} playlists")

    for key, playlist in playlists.items():
        print(f"{playlist['name']}")

    print('Finishing Collecting all playlists')
    print()



    ### PART II
    ### Collecting tracks from all the playlists 
    print(f'=' *20)
    print("Collecting tracks data")
    print(f'='* 20)
    # all tracks data 
    all_track = []

    for playlist_type, playlist_info in playlists.items():# Accessing to the playlist of each years 

        tracks = collect_playlist_tracks(playlist_info['id'], playlist_info['name'], playlist_info['year'])# playlist detail 
        all_track.extend(tracks)

    print("Finishing collecting all tracks")
    print()
    # Coverting to dataframe
    print(f'Coverting tracks dataset to dataframe')
    tracks_df = pd.DataFrame(all_track)
    print('Finishing coverting to dataframe')
    print()

    ## PART III
    ## Transfering spotifyID to Recoo BeatID 
    print(f'='*20)
    print('Transfering to Recoo Beat')
    print(f'='*20)

    spotify_ids = tracks_df['track_id'].tolist()# list of spotify_id for every track

    recoo_beat_ids = get_track_id_recoobeat(track_ids= spotify_ids)

    ### Part IV
    ### getting audio features 
    print(f'='*20)
    print('Transfering to Recoo Beat')
    print(f'='*20)


    all_features =[]

    for spotify_id in recoo_beat_ids.keys():

        track_feature = get_audio_features(recoo_beat_id= recoo_beat_ids[spotify_id]['id'], spotify_id = spotify_id)

        all_features.extend(track_feature)

        break

    display(all_features)
    display(all_track)


    return None
collecting_data = main()

Music trend data colllection
Collection Date: 2025-12-27 13:45:29

Finding top hits

Found 13 playlists
Top Hits of 2011
Top Hits of 2012
Top Hits of 2013
Top Hits of 2014
Top Hits of 2015
Top Hits of 2016
TOP HITS OF 2017
Top Hits of 2018
Top Hits of 2020
Top Hits of 2021
Top Hits of 2022
Top Hits of 2023
Top Hits of 2024
Finishing Collecting all playlists

Collecting tracks data
Collecting tracks form: Top Hits of 2011
Collecting tracks form: Top Hits of 2012
Collecting tracks form: Top Hits of 2013
Collecting tracks form: Top Hits of 2014
Collecting tracks form: Top Hits of 2015
Collecting tracks form: Top Hits of 2016
Collecting tracks form: TOP HITS OF 2017
Collecting tracks form: Top Hits of 2018
Collecting tracks form: Top Hits of 2020
Collecting tracks form: Top Hits of 2021
Collecting tracks form: Top Hits of 2022
Collecting tracks form: Top Hits of 2023
Collecting tracks form: Top Hits of 2024
Finishing collecting all tracks

Coverting tracks dataset to dataframe
Finishing co

[{'id': '3da929a8-c402-43a4-9ed3-69765d6ddf9c',
  'href': 'https://open.spotify.com/track/2U8g9wVcUu9wsg6i7sFSv8',
  'isrc': 'GBAYE1101193',
  'acousticness': 0.00194,
  'danceability': 0.425,
  'energy': 0.732,
  'instrumentalness': 0.0103,
  'key': 9,
  'liveness': 0.171,
  'loudness': -6.883,
  'mode': 1,
  'speechiness': 0.0396,
  'tempo': 117.98,
  'valence': 0.333,
  'spotify_id': '2U8g9wVcUu9wsg6i7sFSv8'}]

[{'year': 2011,
  'collection_date': '2025-12-27',
  'collection_timestamp': '2025-12-27T13:45:38.202860',
  'playlist_rank': 1,
  'playlist_name': 'Top Hits of 2011',
  'track_id': '7Igq29CgfVTTxdGpGT7BP7',
  'track_name': 'Price Tag',
  'artist_id': '2gsggkzM5R49q6jpPvazou',
  'artist_name': 'Jessie J',
  'album_name': 'Who You Are',
  'album_release_date': '2011-01-01',
  'duration_ms': 222533,
  'popularity': 23,
  'explicit': False},
 {'year': 2011,
  'collection_date': '2025-12-27',
  'collection_timestamp': '2025-12-27T13:45:38.202877',
  'playlist_rank': 2,
  'playlist_name': 'Top Hits of 2011',
  'track_id': '20zQZVyUNPbq8kZACdgYrh',
  'track_name': 'On The Floor',
  'artist_id': '2DlGxzQSjYe5N6G9nkYghR',
  'artist_name': 'Jennifer Lopez',
  'album_name': 'LOVE?',
  'album_release_date': '2011-04-29',
  'duration_ms': 284866,
  'popularity': 63,
  'explicit': False},
 {'year': 2011,
  'collection_date': '2025-12-27',
  'collection_timestamp': '2025-12-27T13:45:38.202888',
  'p

In [69]:
import requests

url = "https://api.reccobeats.com/v1/audio-features?ids=3da929a8-c402-43a4-9ed3-69765d6ddf9c"

payload = {}
headers = {
  'Accept': 'application/json'
}

response = requests.request("GET", url, headers=headers, data=payload)

display(response.json()['content'])

[{'id': '3da929a8-c402-43a4-9ed3-69765d6ddf9c',
  'href': 'https://open.spotify.com/track/2U8g9wVcUu9wsg6i7sFSv8',
  'isrc': 'GBAYE1101193',
  'acousticness': 0.00194,
  'danceability': 0.425,
  'energy': 0.732,
  'instrumentalness': 0.0103,
  'key': 9,
  'liveness': 0.171,
  'loudness': -6.883,
  'mode': 1,
  'speechiness': 0.0396,
  'tempo': 117.98,
  'valence': 0.333}]