In [None]:
import requests
import os
from base64 import b64encode
import time

# Get the access token from the environment variables
#access_token = os.environ.get("SPOTIFY_ACCESS_TOKEN")
codes = [("...", "...")]
def get_token(index = 0):
    client_id, client_secret = codes[index]

    authOptions = {
        "url": 'https://accounts.spotify.com/api/token',
        "headers": {
            'Authorization': 'Basic ' + b64encode(bytes(client_id + ':' + client_secret, "utf-8")).decode("utf-8")
        },
        "form": { "grant_type": 'client_credentials' },
        "json": True
    }
    response = requests.post(authOptions["url"], headers=authOptions["headers"], data=authOptions["form"], json=authOptions["json"])
    if response.status_code == 200:
        return response.json()["access_token"]
    return None


In [None]:
# Set the headers with your access token
access_token = get_token()

def get_data(url, access_token, onSuccess):
    def fetch(token, retries = 0):
        response = requests.get(url, headers={ "Authorization" : "Bearer " + token })
        if response.status_code == 200:
            return onSuccess(response.json())
        if retries < 3 and (response.status_code == 401 or response.status_code == 429): # perhaps the token is expired
            if response.status_code == 429: 
                print("Rate limit exceeded, waiting 5 seconds")
                time.sleep(5)
            return fetch(get_token() if response.status_code == 401 else token, retries + 1)
        return None
    return fetch(access_token)

def get_audio_analysis(track_id, access_token):
    return get_data(
        f"https://api.spotify.com/v1/audio-analysis/{track_id}",
        access_token,                 
        lambda r: {
            "key": r["track"]["key"],
            "loudness": r["track"]["loudness"],
            "tempo": r["track"]["tempo"],
            "mode": r["track"]["mode"],
            "number_of_bars": len(r["bars"]),
        })
    
def get_track_features(track_id, access_token):
    return get_data(
        f"https://api.spotify.com/v1/audio-features/{track_id}",
        access_token,                 
        lambda r: {
            "key": r["key"],
            "loudness": r["loudness"],
            "tempo": r["tempo"],
            "mode": r["mode"],
            "danceability": r["danceability"],
            "valence": r["valence"],
        })

def get_tracks_features(track_ids, access_token):
    ids = ",".join(track_ids)
    return get_data(f"https://api.spotify.com/v1/audio-features?ids={ids}", 
        access_token,                 
        lambda r:
        
        list(map(lambda x: {
            "uri": x["uri"],
            "key": x["key"],
            "loudness": x["loudness"],
            "tempo": x["tempo"],
            "mode": x["mode"],
            "danceability": x["danceability"],
            "valence": x["valence"],
            "instrumentalness": x["instrumentalness"],
            "liveness": x["liveness"],
            "acousticness": x["acousticness"],
            "energy": x["energy"],
            "speechiness": x["speechiness"],
        }, filter(lambda x: x is not None, r["audio_features"]))))

def get_track(track_id, access_token):
    return get_data(
        "https://api.spotify.com/v1/tracks", 
        track_id, 
        access_token,                 
        lambda r: {
            "name": r["name"],
            "artists": r["artists"],
            "album": r["album"],
            "explicit": r["explicit"],
            "popularity": r["popularity"],
        })


In [None]:
import csv


tracks_ids = []

def read_csv(file):
    with open(file, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        return [row["uri"] for row in csv_reader]
    
lines = read_csv("output/tracks.csv")
queries = [list(map(lambda x: x.split(":")[2], lines[i:i+100])) for i in range(0, len(lines), 100)]
print(len(queries))
print(queries[0])


In [None]:
import os.path as path
count = len(lines)
tid = 0
for i, q in enumerate(queries):
    index = i + 13984
    output_file = path.join("output", f"features_{index*100}-{(index*100)+100}.csv")
    features = get_tracks_features(queries[index], access_token)
    while features is None:
        tid = (tid + 1) % 5
        access_token = get_token(tid)
        print(f"Token {tid} expired")
        features = get_tracks_features(queries[index], access_token)
    with open(output_file, "w", newline="") as f:
        writer = csv.writer(f, delimiter=",", lineterminator="\n")
        writer.writerow(["uri", "key", "loudness", "tempo", "mode", "danceability", "valence", "instrumentalness", "liveness", "acousticness", "energy", "speechiness"])
        for f in features:
            writer.writerow([f["uri"], f["key"], f["loudness"], f["tempo"], f["mode"], f["danceability"], f["valence"], f["instrumentalness"], f["liveness"], f["acousticness"], f["energy"], f["speechiness"]])
    print(f"\rDone {(index*100)+100}/{count}", end="")


In [None]:
import csv
import glob
import os.path as path

output_file = path.join("output", "final", "features.csv")
with open(output_file, "w+", newline="") as f:
    writer = csv.writer(f, delimiter=",", lineterminator="\n")
    writer.writerow(["uri", "key", "loudness", "tempo", "mode", "danceability", "valence", "instrumentalness", "liveness", "acousticness", "energy", "speechiness"])
    csv_files = glob.glob("output/features_*.csv")
    for file in csv_files:
        with open(file, mode='r') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            for row in csv_reader:
                writer.writerow([row["uri"], row["key"], row["loudness"], row["tempo"], row["mode"], row["danceability"], row["valence"], row["instrumentalness"], row["liveness"], row["acousticness"], row["energy"], row["speechiness"]])