In [6]:
import pandas as pd
tracks_df = pd.read_csv("../spotify_tracks_50.csv")
tracks_df.head()

Unnamed: 0,track_uri,album_name,album_uri,artist_name,artist_uri,duration_ms,pos,track_name,inside_playlists
0,spotify:track:4pLwZjInHj3SimIyN9SnOz,Dangerous Woman,spotify:album:4lVR2fg3DAUQpGVJ6DciHW,Ariana Grande,spotify:artist:66CXWjxzNUsdJxJ2JdwvnR,226160,0,Side To Side,[584466 290613 339107 ... 932309 757216 867665]
1,spotify:track:37f4ITSlgPX81ad2EvmVQr,Wildfire,spotify:album:0mFDIOqypzHp6Xd0el1hoT,Rachel Platten,spotify:artist:3QLIkT4rD2FMusaqmkepbq,204013,1,Fight Song,[584466 290613 540271 ... 535890 217688 26462]
2,spotify:track:6i0V12jOa3mr6uu4WYhUBr,Heathens,spotify:album:3J8W9AOjQhnBLCX33m3atT,Twenty One Pilots,spotify:artist:3YQKmKGau1PzlVlkL1iodx,195920,2,Heathens,[584466 290613 339107 ... 349806 168239 932309]
3,spotify:track:2DpCdPMg1BADE4HDnxt3Rd,"Sit Still, Look Pretty",spotify:album:2cE2eOy7alOZHpuelJEV8Q,Daya,spotify:artist:6Dd3NScHWwnW6obMFbl1BH,202226,3,"Sit Still, Look Pretty",[584466 414677 809633 ... 6555 349244 395752]
4,spotify:track:6Knv6wdA0luoMUuuoYi2i1,MY HOUSE,spotify:album:5lkNnHVlnCCCV304t89wOH,Flo Rida,spotify:artist:0jnsk9HBra6NMjO2oANoPY,192190,4,My House,[584466 290613 968716 ... 257996 349976 6463]


In [None]:
import os
import pandas as pd
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Load environment variables
load_dotenv()
SPOTIPY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
SPOTIPY_CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")

# Authenticate
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
    client_id=SPOTIPY_CLIENT_ID,
    client_secret=SPOTIPY_CLIENT_SECRET))

# Read CSV file
tracks_df = pd.read_csv("../spotify_tracks_50.csv")

# Helper: Batch iterator
def batch(iterable, n=1):
    """Yield successive n-sized batches from list."""
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

# Get list of track URIs and album URIs
track_uris = tracks_df["track_uri"].tolist()
album_uris = tracks_df["album_uri"].tolist()

# Batch process tracks (Spotify API supports up to 50 tracks per request)
all_track_details = []
for track_batch in batch(track_uris, n=50):
    response = sp.tracks(track_batch)
    all_track_details.extend(response["tracks"])

# For album details, we only need to request unique album URIs.
unique_album_uris = list(set(album_uris))
album_details_dict = {}
# Spotify API supports up to 20 albums per call.
for album_batch in batch(unique_album_uris, n=20):
    response = sp.albums(album_batch)
    for album in response["albums"]:
        # Use the album's URI as the key
        album_details_dict[album["uri"]] = album

# Combine track and album information into metadata
track_metadata = []
for track in all_track_details:
    album_uri = track["album"]["uri"]
    album_info = album_details_dict.get(album_uri, {})
    track_metadata.append({
        "track_name": track["name"],
        "track_popularity": track["popularity"],
        "album_name": album_info.get("name", None),
        "album_release_date": album_info.get("release_date", None),
    })

Unnamed: 0,track_name,track_popularity,album_name,album_release_date
0,Side To Side,1,Dangerous Woman,2016-05-20
1,Fight Song,71,Wildfire,2016-01-01
2,Heathens,76,Heathens,2016-06-16
3,"Sit Still, Look Pretty",10,"Sit Still, Look Pretty",2017-01-06
4,My House,71,My House,2015-04-07
5,Work from Home (feat. Ty Dolla $ign),73,7/27 (Deluxe),2016-05-27
6,Morocco,0,Mountains Beaches Cities,2013-09-10
7,Make Your Mind Up,0,Daybreaker,2015-10-02
8,Unstoppable,19,Inside Out,2010-01-01
9,Closer,82,Closer,2016-07-29


In [8]:
# Convert the list of dictionaries into a DataFrame
track_metadata_df = pd.DataFrame(track_metadata)
track_metadata_df.head()

Unnamed: 0,track_name,track_popularity,album_name,album_release_date
0,Side To Side,1,Dangerous Woman,2016-05-20
1,Fight Song,71,Wildfire,2016-01-01
2,Heathens,76,Heathens,2016-06-16
3,"Sit Still, Look Pretty",10,"Sit Still, Look Pretty",2017-01-06
4,My House,71,My House,2015-04-07


In [12]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Assume track_metadata_df already has "track_popularity" and "album_release_date"

# 1. Standardize track popularity
scaler = StandardScaler()
track_metadata_df["track_popularity_scaled"] = scaler.fit_transform(track_metadata_df[["track_popularity"]])

# 2. Define a function to convert release dates to eras (e.g., 70s, 80s, etc.)
def release_date_to_era(release_date):
    try:
        # Convert release_date to datetime; this handles formats like "YYYY", "YYYY-MM-DD", etc.
        year = pd.to_datetime(release_date, errors='coerce').year
        if pd.isnull(year):
            return None
        if 1970 <= year < 1980:
            return "70s"
        elif 1980 <= year < 1990:
            return "80s"
        elif 1990 <= year < 2000:
            return "90s"
        elif 2000 <= year < 2010:
            return "00s"
        elif 2010 <= year < 2020:
            return "10s"
        else:
            return "Other"
    except Exception as e:
        return None

# Apply the function to create a new column for era
track_metadata_df["album_era"] = track_metadata_df["album_release_date"].apply(release_date_to_era)
track_metadata_df.head()

Unnamed: 0,track_name,track_popularity,album_name,album_release_date,track_popularity_scaled,album_era
0,Side To Side,1,Dangerous Woman,2016-05-20,-1.176268,10s
1,Fight Song,71,Wildfire,2016-01-01,1.027661,10s
2,Heathens,76,Heathens,2016-06-16,1.185084,10s
3,"Sit Still, Look Pretty",10,"Sit Still, Look Pretty",2017-01-06,-0.892906,10s
4,My House,71,My House,2015-04-07,1.027661,10s
