In [7]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import spotipy.util as util
import json, os
import config
from tqdm import tqdm

In [8]:
os.environ['SPOTIPY_CLIENT_ID']= config.CLIENT_ID
os.environ['SPOTIPY_CLIENT_SECRET']= config.CLIENT_SECRET
os.environ['SPOTIPY_REDIRECT_URI']='http://localhost:8081/callback'

In [9]:
username = config.USERNAME
scope = 'user-top-read'
token = util.prompt_for_user_token(username,scope)
sp = spotipy.Spotify(auth=token)

In [10]:
tracks = []
for j in tqdm(range(1960,2023)):
    for i in range(0,1000,50):
        try:
            results = sp.search(q='year:'+str(j), type='track', limit=50,offset=i) 
            tracks.extend(results['tracks']['items'])
        except:
            token = util.prompt_for_user_token(username,scope)
            sp = spotipy.Spotify(auth=token)

100%|█████████████████████████████████████████████████████| 63/63 [11:05<00:00, 10.56s/it]


In [13]:
df_tracks = pd.DataFrame(tracks)

In [14]:
df_tracks.to_parquet('data/raw_data.parquet')
df_tracks.to_csv('data/raw_data.csv')

In [15]:
df_tracks['release_date'] = df_tracks['album'].apply(lambda x: x['release_date'])
df_tracks.head()

Unnamed: 0,album,artists,available_markets,disc_number,duration_ms,explicit,external_ids,external_urls,href,id,is_local,name,popularity,preview_url,track_number,type,uri,release_date
0,"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,[IN],1,280080,False,{'isrc': 'INH109350700'},{'spotify': 'https://open.spotify.com/track/4j...,https://api.spotify.com/v1/tracks/4jDfPSa6zoZ4...,4jDfPSa6zoZ4xGTSmyrg8i,False,Khoya Khoya Chand Khula Aasman,35,https://p.scdn.co/mp3-preview/1a63edf3b91501be...,8,track,spotify:track:4jDfPSa6zoZ4xGTSmyrg8i,1960-01-01
1,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,[IN],1,169933,False,{'isrc': 'INH100367800'},{'spotify': 'https://open.spotify.com/track/5o...,https://api.spotify.com/v1/tracks/5o4crBlWt8lQ...,5o4crBlWt8lQnLMfPmkzQS,False,Ei Meghla Dine Ekla,39,https://p.scdn.co/mp3-preview/fe7b58cc9fce8008...,3,track,spotify:track:5o4crBlWt8lQnLMfPmkzQS,1960-12-31
2,"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,179693,False,{'isrc': 'USMC16046323'},{'spotify': 'https://open.spotify.com/track/4H...,https://api.spotify.com/v1/tracks/4Hhv2vrOTy89...,4Hhv2vrOTy89HFRcjU3QOx,False,At Last,78,,7,track,spotify:track:4Hhv2vrOTy89HFRcjU3QOx,1960
3,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,[IN],1,423013,False,{'isrc': 'INH100407396'},{'spotify': 'https://open.spotify.com/track/1x...,https://api.spotify.com/v1/tracks/1xjO1qiSWdRo...,1xjO1qiSWdRoK4TXIR6PeJ,False,Majhe Maher Pandhari,35,https://p.scdn.co/mp3-preview/82ba8bafc7bbe0b4...,3,track,spotify:track:1xjO1qiSWdRoK4TXIR6PeJ,1960-08-04
4,"{'album_type': 'compilation', 'artists': [{'ex...",[{'external_urls': {'spotify': 'https://open.s...,[IN],1,163360,False,{'isrc': 'INH100305220'},{'spotify': 'https://open.spotify.com/track/7p...,https://api.spotify.com/v1/tracks/7pCiEvG73gNj...,7pCiEvG73gNjyq6UdIUDYE,False,Dolayam,35,https://p.scdn.co/mp3-preview/21974165673973e4...,5,track,spotify:track:7pCiEvG73gNjyq6UdIUDYE,1960-04-08


In [17]:
df_audio_features = pd.DataFrame(columns=list(sp.audio_features(df_tracks['id'].values[:1])[0].keys()))
for i in tqdm(range(0,len(df_tracks),100)):
    try:
        df_audio_features = pd.concat([df_audio_features,pd.DataFrame(sp.audio_features(df_tracks.loc[i:i+99,'id'].values))],ignore_index=True)
    except:
        pass

100%|███████████████████████████████████████████████████| 630/630 [03:02<00:00,  3.45it/s]


In [18]:
df = df_tracks.merge(df_audio_features,on='id',how='left')
df

Unnamed: 0,album,artists,available_markets,disc_number,duration_ms_x,explicit,external_ids,external_urls,href,id,...,instrumentalness,liveness,valence,tempo,type_y,uri_y,track_href,analysis_url,duration_ms_y,time_signature
0,"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,[IN],1,280080,False,{'isrc': 'INH109350700'},{'spotify': 'https://open.spotify.com/track/4j...,https://api.spotify.com/v1/tracks/4jDfPSa6zoZ4...,4jDfPSa6zoZ4xGTSmyrg8i,...,0.000017,0.5080,0.742,81.369,audio_features,spotify:track:4jDfPSa6zoZ4xGTSmyrg8i,https://api.spotify.com/v1/tracks/4jDfPSa6zoZ4...,https://api.spotify.com/v1/audio-analysis/4jDf...,280080,4
1,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,[IN],1,169933,False,{'isrc': 'INH100367800'},{'spotify': 'https://open.spotify.com/track/5o...,https://api.spotify.com/v1/tracks/5o4crBlWt8lQ...,5o4crBlWt8lQnLMfPmkzQS,...,0.588000,0.1130,0.711,115.920,audio_features,spotify:track:5o4crBlWt8lQnLMfPmkzQS,https://api.spotify.com/v1/tracks/5o4crBlWt8lQ...,https://api.spotify.com/v1/audio-analysis/5o4c...,169933,3
2,"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,179693,False,{'isrc': 'USMC16046323'},{'spotify': 'https://open.spotify.com/track/4H...,https://api.spotify.com/v1/tracks/4Hhv2vrOTy89...,4Hhv2vrOTy89HFRcjU3QOx,...,0.013300,0.3340,0.328,87.430,audio_features,spotify:track:4Hhv2vrOTy89HFRcjU3QOx,https://api.spotify.com/v1/tracks/4Hhv2vrOTy89...,https://api.spotify.com/v1/audio-analysis/4Hhv...,179693,3
3,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,[IN],1,423013,False,{'isrc': 'INH100407396'},{'spotify': 'https://open.spotify.com/track/1x...,https://api.spotify.com/v1/tracks/1xjO1qiSWdRo...,1xjO1qiSWdRoK4TXIR6PeJ,...,0.001060,0.0806,0.672,112.476,audio_features,spotify:track:1xjO1qiSWdRoK4TXIR6PeJ,https://api.spotify.com/v1/tracks/1xjO1qiSWdRo...,https://api.spotify.com/v1/audio-analysis/1xjO...,423013,4
4,"{'album_type': 'compilation', 'artists': [{'ex...",[{'external_urls': {'spotify': 'https://open.s...,[IN],1,163360,False,{'isrc': 'INH100305220'},{'spotify': 'https://open.spotify.com/track/7p...,https://api.spotify.com/v1/tracks/7pCiEvG73gNj...,7pCiEvG73gNjyq6UdIUDYE,...,0.000003,0.6030,0.697,171.294,audio_features,spotify:track:7pCiEvG73gNjyq6UdIUDYE,https://api.spotify.com/v1/tracks/7pCiEvG73gNj...,https://api.spotify.com/v1/audio-analysis/7pCi...,163360,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62995,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,224000,False,{'isrc': 'DGA072235824'},{'spotify': 'https://open.spotify.com/track/2t...,https://api.spotify.com/v1/tracks/2tGrx6LISSak...,2tGrx6LISSakaJmU399mau,...,0.000000,0.0749,0.624,135.037,audio_features,spotify:track:2tGrx6LISSakaJmU399mau,https://api.spotify.com/v1/tracks/2tGrx6LISSak...,https://api.spotify.com/v1/audio-analysis/2tGr...,224000,3
62996,"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,212857,False,{'isrc': 'FR10S2274897'},{'spotify': 'https://open.spotify.com/track/6p...,https://api.spotify.com/v1/tracks/6pprRG4zENBw...,6pprRG4zENBwRbIbsGHWRM,...,0.000000,0.0677,0.780,80.408,audio_features,spotify:track:6pprRG4zENBwRbIbsGHWRM,https://api.spotify.com/v1/tracks/6pprRG4zENBw...,https://api.spotify.com/v1/audio-analysis/6ppr...,212857,4
62997,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,200571,False,{'isrc': 'FRX762230302'},{'spotify': 'https://open.spotify.com/track/0F...,https://api.spotify.com/v1/tracks/0F7XXCXum92w...,0F7XXCXum92wXkBhYd25Ds,...,0.000000,0.0988,0.672,139.962,audio_features,spotify:track:0F7XXCXum92wXkBhYd25Ds,https://api.spotify.com/v1/tracks/0F7XXCXum92w...,https://api.spotify.com/v1/audio-analysis/0F7X...,200571,4
62998,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,216000,False,{'isrc': 'FRX452213231'},{'spotify': 'https://open.spotify.com/track/32...,https://api.spotify.com/v1/tracks/326eZ9C7qpDj...,326eZ9C7qpDj6NSsEiKvoW,...,0.000006,0.1830,0.688,144.026,audio_features,spotify:track:326eZ9C7qpDj6NSsEiKvoW,https://api.spotify.com/v1/tracks/326eZ9C7qpDj...,https://api.spotify.com/v1/audio-analysis/326e...,216000,4


In [19]:
for i,val in df[df.danceability.isnull()].iterrows():
    try:
        obj = sp.audio_features(df.loc[i,'id'])[0]
        del obj['id']
        df.loc[i,df.columns[len(df_tracks.columns):]] = list(obj.values())
    except:
        pass

In [21]:
df.to_parquet('data/raw_data.parquet')
df.to_csv('data/raw_data.csv',index=False)