In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram
import matplotlib.pyplot as plt


spotify = pd.read_csv('genres_v2.csv')
spotify = spotify.drop(columns=['analysis_url', 'track_href', 'uri', 'id','title'])
spotify.head()



  spotify = pd.read_csv('genres_v2.csv')


Unnamed: 0.1,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,duration_ms,time_signature,genre,song_name,Unnamed: 0
0,0.831,0.814,2,-7.364,1,0.42,0.0598,0.0134,0.0556,0.389,156.985,audio_features,124539,4,Dark Trap,Mercury: Retrograde,
1,0.719,0.493,8,-7.23,1,0.0794,0.401,0.0,0.118,0.124,115.08,audio_features,224427,4,Dark Trap,Pathology,
2,0.85,0.893,5,-4.783,1,0.0623,0.0138,4e-06,0.372,0.0391,218.05,audio_features,98821,4,Dark Trap,Symbiote,
3,0.476,0.781,0,-4.71,1,0.103,0.0237,0.0,0.114,0.175,186.948,audio_features,123661,3,Dark Trap,ProductOfDrugs (Prod. The Virus and Antidote),
4,0.798,0.624,2,-7.668,1,0.293,0.217,0.0,0.166,0.591,147.988,audio_features,123298,4,Dark Trap,Venom,


In [2]:


from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


# Standardizing the features
spotify_scaled = StandardScaler().fit_transform(spotify.drop(columns=['song_name', 'type', 'Unnamed: 0', 'time_signature', 'duration_ms', 'mode', 'genre'] , axis=1))

# Performing PCA
pca = PCA(n_components=0.95)  # retain 95% of the variance
principalComponents = pca.fit_transform(spotify_scaled)

# Creating a DataFrame with principal components
spotify_pca = pd.DataFrame(data=principalComponents)

agg_cluster_model = AgglomerativeClustering(n_clusters=15)
agg_clusters = agg_cluster_model.fit_predict(spotify_pca)


In [30]:
spotify['cluster'] = agg_clusters

In [40]:
def preprocess_song(song_features, standard_scaler, pca):
    # Apply standardization and PCA transformations used in your original dataset
    standardized_features = standard_scaler.transform([song_features])
    pca_features = pca.transform(standardized_features)
    return pca_features

def find_cluster(song_features, cluster_model):
    # Find the nearest cluster for the given song features
    return cluster_model.predict(song_features)[0]

def recommend_similar_songs(input_song_name, spotify_df, standard_scaler, pca, cluster_model):
    if input_song_name not in spotify_df['song_name'].values:
        return "Song '{}' is not in the dataset.".format(input_song_name)

    try:
        # Preprocess the input song
        input_song_features = spotify_df[spotify_df['song_name'] == input_song_name].drop(columns=['song_name', 'cluster'])
        preprocessed_input_song = preprocess_song(input_song_features, standard_scaler, pca)

        # Find the cluster of the input song
        input_song_cluster = find_cluster(preprocessed_input_song, cluster_model)

        # Filter songs from the same cluster
        same_cluster_songs = spotify_df[spotify_df['cluster'] == input_song_cluster].copy()

        # Compute similarity
        same_cluster_songs['similarity'] = same_cluster_songs.apply(
            lambda row: np.linalg.norm(preprocess_song(row.drop(['song_name', 'cluster']), standard_scaler, pca) - preprocessed_input_song),
            axis=1
        )

        # Sort by similarity and select top 5
        top_recommendations = same_cluster_songs.sort_values(by='similarity')['song_name'].head(5).tolist()

        return top_recommendations

    except Exception as e:
        return "An error occurred: {}".format(e)


In [None]:
# Example song name
input_song_name = "Mock Song"

# Assuming the functions and models are already defined and trained:
# - standard_scaler: The StandardScaler object used for your dataset.
# - pca: The PCA object used for dimensionality reduction.
# - agg_cluster_model: The trained AgglomerativeClustering model.

try:
    # Call the recommend_similar_songs function
    similar_songs = recommend_similar_songs(
        input_song_name, 
        spotify, 
        spotify_scaled, 
        pca, 
        agg_cluster_model
    )

    # Check if the function returned a list (indicating successful recommendation)
    if isinstance(similar_songs, list):
        print(f"Songs similar to '{input_song_name}':")
        for song in similar_songs:
            print(song)
    else:
        # If not a list, it should be an error or informational message
        print(similar_songs)

except Exception as e:
    print(f"An error occurred while recommending songs: {e}")
