In [12]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [4]:
# Import the dataset
# which can be found on kaggle.com
# https://www.kaggle.com/datasets/maharshipandya/-spotify-tracks-dataset
main_df = pd.read_csv("/Users/lorenzograssi/Desktop/Projects/Spotify/data.csv")

# Erase some variables we are not interested in 
main_df.drop(["id", "release_date", "liveness"], axis=1, inplace=True)

In [13]:
# Setup
features = [
    'valence', 'danceability', 'energy', 'speechiness',
    'acousticness', 'instrumentalness', 'tempo', 'loudness'
]

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(main_df[features])
main_df['original_index'] = main_df.index

# Calculate weights using PCA
pca = PCA()
pca.fit(X_scaled)
loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
df_loadings = pd.DataFrame(loadings, index=features)
df_abs_loadings = df_loadings.abs()
explained_variance_ratio = pca.explained_variance_ratio_
weights = explained_variance_ratio
relevance_scores = df_abs_loadings.multiply(weights, axis=1).sum(axis=1)
weights_normalized = relevance_scores / relevance_scores.max()

# Sort the relevance scores in descending order
relevance_scores_sorted = relevance_scores.sort_values(ascending=False)

In [14]:
# weighted cosine similarity function
def weighted_cosine_similarity(vec1, vec2, weights):
    weighted_vec1 = vec1 * weights
    weighted_vec2 = vec2 * weights
    sim = np.dot(weighted_vec1, weighted_vec2) / (np.linalg.norm(weighted_vec1) * np.linalg.norm(weighted_vec2))
    return sim

In [15]:
# Recommendation function
def recommend_by_title(title, n_recommendations=20, alpha_pop=0.05, alpha_year=0.01):
    matches = main_df[main_df['name'].str.lower() == title.lower()]
    if matches.empty:
        return f"Song '{title}' not found."
    
    idx = matches.iloc[0]['original_index']
    target_vec = X_scaled[idx]
    target_year = matches.iloc[0]['year']
    
    similarities = []
    for i, row in main_df.iterrows():
        sim = weighted_cosine_similarity(target_vec, X_scaled[i], weights_normalized.values)
        
        # Normalized popularity (0-1) and weighted by alpha_pop
        pop_norm = (row['popularity'] - main_df['popularity'].min()) / (main_df['popularity'].max() - main_df['popularity'].min())
        pop_score = 1 + alpha_pop * pop_norm
        
        # Year: the closer to the target, the higher the score (exponential decay)
        year_diff = abs(row['year'] - target_year)
        year_score = 1 + alpha_year * np.exp(-year_diff / 5)
        
        final_score = sim * pop_score * year_score
        similarities.append((i, final_score))
    
    # Sort by descending score
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    
    # Exclude the input song itself
    recommended_indices = [i for i, score in similarities if i != idx][:n_recommendations]
    
    return main_df.iloc[recommended_indices][['name', 'artists', 'year', 'popularity']]


In [19]:
# User Interaction

# Ask user to input an artist name
input_artist = input("Enter the artist's name: ").lower()

# Filter songs by artist name
main_df['artist_str'] = main_df['artists'].astype(str).str.lower()
artist_songs = main_df[main_df['artist_str'].str.contains(input_artist, na=False)]

# If no songs found, notify the user
if artist_songs.empty:
    print(f"No songs found for artist '{input_artist}'.")
else:
    # Show found songs
    print(f"\nSongs found for artist '{input_artist}':\n")
    
    songs_list = sorted(artist_songs['name'].dropna().unique())

    for i, title in enumerate(songs_list):
        print(f"{i}: {title}")
    
    # Ask user to choose a song
    choice = int(input("\nChoose the index of the song: "))
    chosen_title = songs_list[choice]

    # Get the selected song row
    chosen_song_row = main_df[main_df['name'].str.lower() == chosen_title.lower()].iloc[0]

    # Generate playlist recommendations
    playlist = recommend_by_title(chosen_title)

    if isinstance(playlist, str):
        print(playlist)
    else:
        print(f"\n🎵 Recommended playlist based on: '{chosen_title}'\n")

        # Reset index to keep original index as column
        playlist = playlist.reset_index()

        # Print recommended songs first (only names)
        for i, row in playlist.iterrows():
            name = row['name'].upper()
            artists = ', '.join(eval(row['artists'])) if isinstance(row['artists'], str) else str(row['artists'])
            year = row['year']
            print(f"{i + 1}) {name} by {artists} ({year})")


Enter the artist's name:  daft punk



Songs found for artist 'daft punk':

0: Adagio For TRON
1: Aerodynamic
2: Aerodynamic - Daft Punk Remix
3: Alive
4: Arena
5: Armory
6: Around the World
7: Around the World - Radio Edit [Radio Edit]
8: Around the World / Harder, Better, Faster, Stronger
9: Burnin'
10: Burnin' - Edit Version [Edit Version]
11: C.L.U.
12: Contact
13: Crescendolls
14: Da Funk
15: Daftendirekt
16: Derezzed - From "TRON: Legacy"/Score
17: Derezzed - Remixed by The Glitch Mob
18: Digital Love
19: Disc Wars
20: Doin' it Right (feat. Panda Bear)
21: End of Line
22: Face to Face
23: Fall
24: Flynn Lives
25: Fragments of Time (feat. Todd Edwards)
26: Fresh
27: Get Lucky (feat. Pharrell Williams & Nile Rodgers)
28: Get Lucky (feat. Pharrell Williams & Nile Rodgers) - Radio Edit
29: Giorgio by Moroder
30: Give Life Back to Music
31: Harder, Better, Faster, Stronger
32: High Fidelity
33: High Life
34: Human After All
35: I Feel It Coming
36: Indo Silver Club
37: Instant Crush (feat. Julian Casablancas)
38: Lose You


Choose the index of the song:  72



🎵 Recommended playlist based on: 'Veridis Quo'

1) GOLDEN LIGHT by STRFKR (2013)
2) MOTHERBOARD by Daft Punk (2013)
3) A GREAT DESIGN by Black Marble (2012)
4) RINZLER by Daft Punk (2010)
5) THE ONE WITH THE WURLITZER by American Football (1999)
6) DOCKING THE POD by Duster (1998)
7) SNAKE SONG by Isobel Campbell, Mark Lanegan (2010)
8) INTRO by The xx (2009)
9) MOON (AND IT WENT LIKE) by Kid Francescoli (2017)
10) SLEEPY HERBS by Ole Lukkoye (2012)
11) FORTUNE DAYS by The Glitch Mob (2010)
12) CHIRP by C418 (2013)
13) THE GAME HAS CHANGED by Daft Punk (2010)
14) PIMPF - 2006 REMASTER by Depeche Mode (1987)
15) VOYAGER by Daft Punk (2001)
16) PARIS by M|O|O|N (2011)
17) GIRL by salvia palth (2013)
18) THAT'S THE WAY LOVE GOES by Norman Brown (1994)
19) EQUINOXE, PT. 4 by Jean-Michel Jarre (1978)
20) IN DREAMS by Ben Howard (2014)
