In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

def select_song(matching_songs):
    if matching_songs.shape[0] == 1:
        return matching_songs.index[0]
    elif matching_songs.shape[0] > 1:
        print("\nMultiple songs found. Please choose from the following options:")
        for i, (song, artist) in enumerate(zip(matching_songs['song_name'], matching_songs['artist'])):
            print(f"{i + 1}. {song} by {artist}")

        user_choice = int(input("\nEnter the number corresponding to your desired song: "))
        
        if 1 <= user_choice <= matching_songs.shape[0]:
            return matching_songs.index[user_choice - 1]  
        else:
            print("\nInvalid choice. Please enter a valid number.")
            return None
    else:
        print("\nThe song was not found in the dataset.")
        return None


rap_df = pd.read_csv("../data/mainDB.csv")
genres_df = pd.read_csv("../data/genreDB.csv")

scaler = StandardScaler()

user_input = input("Enter the song name: ")

matching_songs = rap_df[
    (rap_df['song_name'].str.strip().str.lower() == user_input.strip().lower())
]

selected_song_index = select_song(matching_songs)

if not matching_songs.empty:
    input_song_index = selected_song_index
    input_song_genre1 = rap_df.loc[input_song_index, 'genre_1']
    input_song_genre2 = rap_df.loc[input_song_index, 'genre_2']
    input_song_genre3 = rap_df.loc[input_song_index, 'genre_3']
    
    if input_song_genre1.lower() in genres_df['Genre'].str.lower().tolist():
        filtered_songs = rap_df[
            (rap_df['popularity'].between(rap_df['popularity'].iloc[input_song_index] - 20,
                                          rap_df['popularity'].iloc[input_song_index])) &
            (rap_df['release_year'].between(rap_df['release_year'].iloc[input_song_index] - 10,
                                            rap_df['release_year'].iloc[input_song_index] + 10)) &
            (((rap_df['genre_1'].str.lower() == input_song_genre1.lower()) if isinstance(input_song_genre1, str) else False) |
             ((rap_df['genre_2'].str.lower() == input_song_genre2.lower()) if isinstance(input_song_genre2, str) else False) |
             ((rap_df['genre_3'].str.lower() == input_song_genre3.lower()) if isinstance(input_song_genre3, str) else False))
        ]

        X_scaled_filtered = pd.DataFrame(scaler.fit_transform(filtered_songs.drop('release_year', axis=1)
                                                               .select_dtypes(include=['float64', 'int64'])
                                                               .dropna()),
                                         columns=filtered_songs.drop('release_year', axis=1)
                                         .select_dtypes(include=['float64', 'int64']).columns,
                                         index=filtered_songs.index)
    
    
    

    print("\nOriginal Features of the Input Song:")
    print(X_scaled_filtered.loc[input_song_index])

    print("Feature Weights Before Applying:")
    print(X_scaled_filtered.head())

    feature_weights = {
        'acousticness': 1.0,
        'danceability': 1.0,
        'energy': 1.0,
        'instrumentalness': 1.0,
        'liveness': 1.0,
        'loudness': 1.0,
        'speechiness': 1.0,
        'tempo': 1.0,
        'valence': 1.0,
        #'duration_ms': 0.0,
        'time_signature': 1.0,
        'key': 1.0,
        'mode': 1.0
        }
    
    for feature, weight in feature_weights.items():
        X_scaled_filtered[feature] *= weight

    print("\nFeature Weights After Applying:")
    print(X_scaled_filtered.head())

    n_neighbors = min(21, len(filtered_songs) - 1)
    neighbors_model_filtered = NearestNeighbors(n_neighbors=n_neighbors, metric="cosine")
    
    neighbors_model_filtered.fit(X_scaled_filtered)

    input_song_features = X_scaled_filtered.loc[input_song_index].values.reshape(1, -1)

    distances, indices = neighbors_model_filtered.kneighbors(input_song_features)
    similar_songs = filtered_songs.iloc[indices[0]]

    similar_songs = similar_songs[similar_songs.index != input_song_index]
    distances = [row[1:] for row in distances] 

    count = 1
    for i, distances_row in enumerate(distances):
        for j, distance in enumerate(distances_row):
            if j < len(similar_songs):
                print(f"{count}. Song Name: {similar_songs['song_name'].iloc[j]}, "
                    f"Song Author: {similar_songs['artist'].iloc[j]}, "
                    f"Release Year: {similar_songs['release_year'].iloc[j]}, "
                    f"Popularity: {similar_songs['popularity'].iloc[j]}, "
                    f"Song Genre: {similar_songs['genre_1'].iloc[j]},"
                    f"Distance Score: {distance:.2f}")
                count += 1


    
    num_filtered_songs = len(filtered_songs)
    print(f"\nNumber of songs in the filtered dataset: {num_filtered_songs}")

    input_song_details = rap_df.iloc[input_song_index][['song_name', 'artist', 'popularity', 'release_year']]
    print("\nInput Song Details:")
    print(input_song_details.to_string(index=False))




else:
    print(f"\nThe song '{user_input}' was not found in the dataset.")





