In [6]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors

# Load the dataset
genres_df = pd.read_csv("genres_v2.csv")

# Select audio features for similarity
audio_features = ['danceability', 'energy', 'loudness', 'speechiness', 
                  'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

# Drop rows with missing values in the selected features
genres_df_cleaned = genres_df.dropna(subset=audio_features)

# Remove duplicate songs based on 'song_name'
genres_df_cleaned = genres_df_cleaned.drop_duplicates(subset='song_name', keep='first')

# Normalize the audio features
scaler = MinMaxScaler()
genres_df_cleaned[audio_features] = scaler.fit_transform(genres_df_cleaned[audio_features])

# Dimensionality reduction
pca = PCA(n_components=0.95)  # Retain 95% variance
reduced_features = pca.fit_transform(genres_df_cleaned[audio_features])

# Train the NearestNeighbors model
knn_model = NearestNeighbors(metric='cosine', algorithm='auto')
knn_model.fit(reduced_features)

# Create a mapping of song names to indices
song_name_to_idx = {name: idx for idx, name in enumerate(genres_df_cleaned['song_name'])}

# Define the recommendation function
def recommend_songs(song_name, top_n=5):
    """
    Recommend similar songs based on a given song's name.

    Parameters:
        song_name (str): The name of the input song.
        top_n (int): Number of similar songs to recommend.

    Returns:
        list: Recommended song names or a message if the song is not found.
    """
    if song_name not in song_name_to_idx:
        return f"Song '{song_name}' not found in the dataset."
    
    # Get the index of the song
    song_idx = song_name_to_idx[song_name]
    
    # Find similar songs
    distances, indices = knn_model.kneighbors([reduced_features[song_idx]], n_neighbors=top_n+1)
    
    # Exclude the input song from the recommendations
    recommended_indices = [i for i in indices[0] if i != song_idx]
    
    # Get recommended song names (up to the desired number)
    similar_songs = genres_df_cleaned.iloc[recommended_indices[:top_n]]['song_name'].values
    return similar_songs

# Test the recommendation system
example_song = 'Mercury: Retrograde'  # Replace with any song name in the dataset
recommended_songs = recommend_songs(example_song, top_n=5)

print(f"Recommendations for '{example_song}': {recommended_songs}")


Recommendations for 'Mercury: Retrograde': ['Kid Cudi (Remix)'
 'FRANCHISE (feat. Future, Young Thug & M.I.A.) - REMIX' 'Run Boy Run!'
 'EXORCIST' 'P-R-E-Y']


  genres_df = pd.read_csv("genres_v2.csv")
