# This Notebook fetches the Spotify Attributes of a given Song and updates our existing csv

In [None]:
import pandas as pd
from dotenv import load_dotenv
import os
import requests
import base64
import json
import time

load_dotenv()

client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

df = pd.read_csv('cleaned_gb_17-24.csv')

## 1.) Access token from Spotify Api 

In [None]:
# This request was written with the help of an LLM
auth_string = f"{client_id}:{client_secret}"
auth_bytes = auth_string.encode("utf-8")
auth_base64 = base64.b64encode(auth_bytes).decode("utf-8")

url = "https://accounts.spotify.com/api/token"
headers = {
    "Authorization": f"Basic {auth_base64}",
    "Content-Type": "application/x-www-form-urlencoded"
}
data = {"grant_type": "client_credentials"}

response = requests.post(url, headers=headers, data=data)

if response.status_code == 200:
    token_info = response.json()
    access_token = token_info["access_token"]
    print(f"Access Token: {access_token}")
else:
    print("Failed to get token:", response.status_code, response.json())


Access Token: BQCrOjkxvfeH4lZE7XIo9nbb9bfWUtXEM3jhcPCy4kTwqRuHKrncdEqocYViEvYU0oOw-OypXA0WE-yilEUl2-Y66eOtOzY6Ae31O_LuYE7357OlpWGOUca8jQ8kAx-WgQcIt5unshI


In [6]:
track_ids = df['uri'].apply(lambda x: x.split(':')[-1]).tolist()

## 2.) API Request for JSON Object of Attributes 

In [None]:
# This request was written with the help of an LLM
chunk_size = 50
chunks = [track_ids[i:i + chunk_size] for i in range(0, len(track_ids), chunk_size)]

all_track_data = []

headers = {
    "Authorization": f"Bearer {access_token}"
}

def handle_rate_limit(response):
    if response.status_code == 429:
        retry_after = int(response.headers.get("Retry-After", 5))
        print(f"Rate limit hit! Waiting {retry_after} seconds...")
        time.sleep(retry_after)

for i, chunk in enumerate(chunks):
    url = f"https://api.spotify.com/v1/tracks?ids={','.join(chunk)}"
    print(f"Fetching chunk {i+1}/{len(chunks)}...")

    response = requests.get(url, headers=headers)

    handle_rate_limit(response)

    if response.status_code == 200:
        track_data = response.json()
        all_track_data.extend(track_data['tracks'])
        print(f" Fetched {len(track_data['tracks'])} tracks.")
    else:
        print(f" Error: {response.status_code}, {response.text}")
        break

print(f" Total tracks fetched: {len(all_track_data)}")


Fetching chunk 1/197...
 Fetched 50 tracks.
Fetching chunk 2/197...
 Fetched 50 tracks.
Fetching chunk 3/197...
 Fetched 50 tracks.
Fetching chunk 4/197...
 Fetched 50 tracks.
Fetching chunk 5/197...
 Fetched 50 tracks.
Fetching chunk 6/197...
 Fetched 50 tracks.
Fetching chunk 7/197...
 Fetched 50 tracks.
Fetching chunk 8/197...
 Fetched 50 tracks.
Fetching chunk 9/197...
 Fetched 50 tracks.
Fetching chunk 10/197...
 Fetched 50 tracks.
Fetching chunk 11/197...
 Fetched 50 tracks.
Fetching chunk 12/197...
 Fetched 50 tracks.
Fetching chunk 13/197...
 Fetched 50 tracks.
Fetching chunk 14/197...
 Fetched 50 tracks.
Fetching chunk 15/197...
 Fetched 50 tracks.
Fetching chunk 16/197...
 Fetched 50 tracks.
Fetching chunk 17/197...
 Fetched 50 tracks.
Fetching chunk 18/197...
 Fetched 50 tracks.
Fetching chunk 19/197...
 Fetched 50 tracks.
Fetching chunk 20/197...
 Fetched 50 tracks.
Fetching chunk 21/197...
 Fetched 50 tracks.
Fetching chunk 22/197...
 Fetched 50 tracks.
Fetching chunk 23/1

## 3.) Throw away some for us useless columns and save csv 

In [None]:
track_data = pd.json_normalize(all_track_data)
selected_columns = [
    'duration_ms',
    'explicit',
    'is_local',
    'popularity',
    'track_number',
    'album.release_date',
    'album.total_tracks',
    'album.name', 
    'uri'
]
filtered_df = track_data[selected_columns].copy()
filtered_df.rename(columns={
    'album.release_date': 'release_date',
    'album.total_tracks': 'album_total_tracks',
    'album.name': 'album_name',
    'uri': 'spotify_uri' 
}, inplace=True)


In [None]:

merged_df = pd.concat([df, filtered_df], axis=1)

merged_df.to_csv('daily_usa_19-24_with_spotify.csv', index=False)