In [1]:
!pip install tqdm

You should consider upgrading via the '/Users/salimwid/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
#################
## PREPARATION ##
#################

# Import modules
import sys
# If your authentification script is not in the project directory
# append its folder to sys.path
sys.path.append("../spotify_api_web_app")
import authorization
import pandas as pd
from tqdm import tqdm
import time

# Authorize and call access object "sp"
sp = authorization.authorize()

# Get all genres
genres = sp.recommendation_genre_seeds()

# Set number of recommendations per genre
n_recs = 100

# Initiate a dictionary with all the information you want to crawl
data_dict = {"id":[], "genre":[], "track_name":[], "artist_name":[],
             "valence":[], "energy":[]}

################
## CRAWL DATA ##
################

# Get recs for every genre
for g in tqdm(genres):
    
    # Get n recommendations
    recs = sp.recommendations(genres = [g], limit = n_recs)
    # json-like string to dict
    recs = eval(recs.json().replace("null", "-999").replace("false", "False").replace("true", "True"))["tracks"]
    
    # Crawl data from each track
    for track in recs:
        # ID and Genre
        data_dict["id"].append(track["id"])
        data_dict["genre"].append(g)
        # Metadata
        track_meta = sp.track(track["id"])
        data_dict["track_name"].append(track_meta.name)
        data_dict["artist_name"].append(track_meta.album.artists[0].name)
        # Valence and energy
        track_features = sp.track_audio_features(track["id"])
        data_dict["valence"].append(track_features.valence)
        data_dict["energy"].append(track_features.energy)
        
        # Wait 0.2 seconds per track so that the api doesnt overheat
        time.sleep(0.2)
        
##################
## PROCESS DATA ##
##################

# Store data in dataframe
df = pd.DataFrame(data_dict)

# Drop duplicates
df.drop_duplicates(subset = "id", keep = "first", inplace = True)
df.to_csv("valence_arousal_dataset.csv", index = False)

100%|██████████| 126/126 [1:12:55<00:00, 34.73s/it]


In [3]:
data_dict

{'id': ['2CjMm3TDd9BS8xAcvbe6yY',
  '6bp1twiUg81a04GVGXHIsc',
  '2r07lezrHUPEuhhBYW9JnC',
  '51jy98I9q9Nk1xyA0W4ZBg',
  '574GJ55EBM79W6n2V5bL5z',
  '3guR6HQDbOhJq5r3AcEgBE',
  '6jzWyXK9O7DoNWYRN99NMV',
  '23NPGXlSaIqWzvxIRhM2oG',
  '5sM2GI0vMNZL7tvfPQfByV',
  '2V2pxcEfn5zs79sE7qcewA',
  '2BBb3UMJBNlofpC25pbSp4',
  '4E0lSJ6c5unoku9UIZoJSl',
  '00cP99zN0bsUZSpXbAlQXg',
  '4hbGPfz03P8Nnh75mFIAoH',
  '2kDF8cIxXytMdZq6AmkWSs',
  '4dHEQ1W1jRmNjjB0S7vB8V',
  '423yF55nwTUJqNWXqNeLqJ',
  '6FVuzVYUdbPBJr6kuxgNGs',
  '3i508xSmrzqtAGl6wYPGae',
  '1ZXSLHLp5AJnCp12UXdYiO',
  '3KnQSveB0TCz2Lu3TaJWMJ',
  '2TdzUeMGoQNKZlQcqWqhtK',
  '3LcYYV9ozePfgYYmXv0P3r',
  '4JKHACLkotsxdKNF9vJYOS',
  '4UnbdQoTvXhiKpPCBgW8Tn',
  '3Qf1X9wE43iXD8RLU4zlKR',
  '10ViidwjGLCfVtGPfdcszR',
  '5SdG78xwNRsjXmFGhm9Z8D',
  '2UBRsk5jRJaGkpHtdEmOoe',
  '6m0qBDIhzICOwWMPZdgyVK',
  '0fBSs3fRoh1yJcne77fdu9',
  '3AkxSspcYOvhWTkaMvqyaD',
  '7z7fquRFQFXt4Dj7ouWETq',
  '1VdZ0vKfR5jneCmWIUAMxK',
  '5FPnjikbwlDMULCCCa6ZCJ',
  '3C5in0EVdoG

In [1]:
import pandas as pd
pd.read_csv('valence_arousal_dataset.csv')

Unnamed: 0,id,genre,track_name,artist_name,valence,energy
0,2CjMm3TDd9BS8xAcvbe6yY,acoustic,Let Her Go (feat. Hannah Trigwell),Boyce Avenue,0.307,0.3330
1,6bp1twiUg81a04GVGXHIsc,acoustic,Beach Baby,Bon Iver,0.111,0.0774
2,2r07lezrHUPEuhhBYW9JnC,acoustic,World Spins Madly On,The Shirelles,0.337,0.3610
3,51jy98I9q9Nk1xyA0W4ZBg,acoustic,"If You've Gotta Go, Go Now - Bonus Track",Various Artists,0.325,0.2170
4,574GJ55EBM79W6n2V5bL5z,acoustic,Slow It Down,The Lumineers,0.108,0.0640
...,...,...,...,...,...,...
11139,4AdEId7iTSaADarHjqIXvT,world-music,Oualahila Ar Tesninam,Various Artists,0.752,0.6980
11140,79lLkqFf3SWvmWVMmznKF9,world-music,Ausencia,Cesária Evora,0.427,0.1320
11141,4M6FuBh0zCSLFgoJ2SAWuw,world-music,Jama ko,Bassekou Kouyate,0.904,0.6650
11142,0V0CNmEzYPJRKeabBej6fu,world-music,"Free (Sina Mali, Sina Deni)",Various Artists,0.731,0.6540
