# Create a mini-pipeline for new songs not in my original dataset (provide a spotify url)

In [2]:
# Load API secrets from .env file
!pip install python-dotenv



In [None]:
from dotenv import load_dotenv
import os
import spotipy
from spotipy.oauth2 import SpotifyOAuth

# https://developer.spotify.com/dashboard
load_dotenv()  # Load variables from .env into environment

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    scope='user-library-read user-top-read user-read-recently-played playlist-read-private playlist-read-collaborative'
))

## Use a Taylor Swift song I've never saved as an example

### General Features - track_duration, track_popularity, track_release_date, track_explicit, artist_popularity, artist_follower_count, genre_list

In [25]:
track_url = 'https://open.spotify.com/track/3hUxzQpSfdDqwM3ZTFQY0K?si=f293d678602c4803' # Taylor Swift - august (going to columbia in august)

In [26]:
# Extract track ID from URL
track_id = track_url.split("/")[-1].split("?")[0]

# Get track info
track_info = sp.track(track_id)

# Get artist data
artist_id = track_info['artists'][0]['id']
artist = sp.artist(artist_id)

In [27]:
from datetime import datetime

# Extract general features
track_name = track_info['name']
artist_name = artist['name']
track_duration = int(track_info['duration_ms'] / 1000) # in seconds
track_popularity = track_info['popularity']
track_explicit = int(track_info['explicit'])  # 1 if True, 0 if False
artist_popularity = artist['popularity']
artist_follower_count = artist['followers']['total']
genre_list = artist['genres']  # this is a list of genres

# Track Release Date - Datetime format modification
release_date_str = track_info['album']['release_date']
release_date_obj = datetime.strptime(release_date_str, "%Y-%m-%d")# Parse it and format as YYYYMM
track_release_date = int(release_date_obj.strftime("%Y%m"))

# Print
print(f"track_name: {track_name}")
print(f"artist_name: {artist_name}")
print(f"track_duration: {track_duration}")
print(f"track_popularity: {track_popularity}")
print(f"track_release_date: {track_release_date}")
print(f"track_explicit: {track_explicit}")
print(f"artist_popularity: {artist_popularity}")
print(f"artist_follower_count: {artist_follower_count}")
print(f"genre_list: {genre_list}")

track_name: august
artist_name: Taylor Swift
track_duration: 261
track_popularity: 87
track_release_date: 202007
track_explicit: 0
artist_popularity: 98
artist_follower_count: 140290461
genre_list: []


### Nick Specific Features - artist_ranking, artist_freq, genre_* (check from CSVs)

In [28]:
import pandas as pd

# Load CSVs
artist_ranking = pd.read_csv("nick_artist_ranking.csv", index_col=0).squeeze("columns")
artist_freq = pd.read_csv("nick_artist_freq.csv", index_col=0).squeeze("columns")

artist_name = artist['name']

# Look up the artist in both Series
ranking = artist_ranking.get(artist_name, None)
freq = artist_freq.get(artist_name, None)

# Print results
if ranking is not None:
    print(f"Artist ranking: {ranking}")
else:
    print("Artist not found in ranking data.")

if freq is not None:
    print(f"Artist frequency: {freq}")
else:
    print("Artist not found in frequency data.")


Artist ranking: 139
Artist frequency: 3


In [None]:
# Load the list of genres from file
with open("nick_artist_genres.txt", "r") as f:
    nick_artist_genres = [line.strip() for line in f.readlines()]

# Given genre_list from Spotify artist info
genre_list = artist['genres']

print(genre_list)

# Create binary variables for each genre
genre_flags = {}

for genre in nick_artist_genres:
    genre_key = f"genre_{genre.replace(' ', '_')}"
    genre_flags[genre_key] = int(genre in genre_list)

print(genre_flags)


[]
{'genre_metalcore': 0, 'genre_post-hardcore': 0, 'genre_emo': 0, 'genre_pop_punk': 0, 'genre_screamo': 0, 'genre_metal': 0, 'genre_classical': 0, 'genre_classical_piano': 0, 'genre_alternative_metal': 0, 'genre_rock': 0, 'genre_chamber_music': 0, 'genre_deathcore': 0, 'genre_post-grunge': 0, 'genre_rap_metal': 0, 'genre_djent': 0, 'genre_nu_metal': 0, 'genre_punk': 0, 'genre_rap': 0, 'genre_christian_rock': 0, 'genre_edm': 0, 'genre_hard_rock': 0, 'genre_progressive_metal': 0}


In [30]:
# Combine all features into one dictionary
row_data = {
    'track_duration': track_duration,
    'track_popularity': track_popularity,
    'track_release_date': track_release_date,
    'track_explicit': track_explicit,
    'artist_popularity': artist_popularity,
    'artist_follower_count': artist_follower_count,
    'artist_ranking': ranking,
    'artist_freq': freq,
    **genre_flags  # unpack all genre feature flags
}

# Convert to single-row DataFrame
df_row = pd.DataFrame([row_data])

# Display the resulting row
print(df_row.to_string(index=False))

 track_duration  track_popularity  track_release_date  track_explicit  artist_popularity  artist_follower_count  artist_ranking  artist_freq  genre_metalcore  genre_post-hardcore  genre_emo  genre_pop_punk  genre_screamo  genre_metal  genre_classical  genre_classical_piano  genre_alternative_metal  genre_rock  genre_chamber_music  genre_deathcore  genre_post-grunge  genre_rap_metal  genre_djent  genre_nu_metal  genre_punk  genre_rap  genre_christian_rock  genre_edm  genre_hard_rock  genre_progressive_metal
            261                87              202007               0                 98              140290461             139            3                0                    0          0               0              0            0                0                      0                        0           0                    0                0                  0                0            0               0           0          0                     0          0                0  

In [31]:
# Data is now ready for the model

***

# Create a prediction from model on Data

In [32]:
import joblib

model = joblib.load("lightgbm_model.pkl")

prediction = model.predict(df_row)  # returns 0 or 1
probability = model.predict_proba(df_row)[:, 1]  # probability of class 1

print("Predicted class:", prediction[0])
#print("Probability of being top song:", probability[0])

Predicted class: 0


## Sorry Taylor, the model says your song wouldn't be in my favorites and I agree