# Final Part - Building recommendation to a song


In [37]:
 #importing libraries
import pandas as pd
from pandas import json_normalize
import spotipy 
from spotipy.oauth2 import SpotifyClientCredentials
import pickle
import joblib
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore", message="X has feature names, but KMeans was fitted without feature names")


In [38]:
#Importing the top 100 song playlist and larger set of songs with the audio features

top100_df = pd.read_csv('top_hot_songs.csv')
larger_set_songs= pd.read_csv('clustered_songs.csv')

In [39]:
import numpy as np
# Load Spotify credentials
secrets_file = open("secrets.txt", "r")
string = secrets_file.read()

secrets_dict = {}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]] = line.split(':')[1].strip()

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],   # Set up the Spotify API client
                                                           client_secret=secrets_dict['clientsecret']))

# User input
print("Hey, want some music recommendations? I'm here to help!")
song = input("Enter a song title: ").strip().lower()
artist = input("Enter the name of the artist: ").strip().lower()

# Search for user input song
user_result = sp.search(artist + " " + song, type="track", limit=5)  # Increase limit for more search results
tracks = json_normalize(user_result["tracks"]["items"])

if not tracks.empty: # Check if search results are not empty
    
    match_found = False # Initialize a variable to track if a match is found
    
    for index, track in tracks.iterrows(): # Iterate over each track in the search results
        
        searched_song = track['name'].lower()
        searched_artist = track['artists'][0]['name'].lower()

        # Check if the searched song and artist match the details of any track
        if searched_song == song.lower() and searched_artist == artist.lower():
            match_found = True
            if (song in top100_df['title'].str.lower().values) and (artist in top100_df['artist'].str.lower().values):
                random = top100_df.sample() # Select a random row from the top 100 DataFrame
                random_result = sp.search(q=f"artist:{random['artist'].values[0]} track:{random['title'].values[0]}", type="track", limit=1)
                random_song = json_normalize(random_result["tracks"]["items"]) # Normalize the search results into a DataFrame
                print("Check out this hot song: ")
                print(random['title'].values[0], ' by ', random['artist'].values[0]) # Print the recommended song and artist
            else:
                # Extract audio features of the song from Spotify
                audio_features_list = sp.audio_features(tracks['id'][index])
                audio_features_df = json_normalize(audio_features_list)
                popularity_features_df = pd.concat([tracks["popularity"], audio_features_df], axis=1) # Concatenate the popularity feature with the audio features
                
                # Filter out non-numerical features
                numerical_features = popularity_features_df.select_dtypes(include=[np.number])
                
                # Identify missing values
                missing_values = numerical_features.isnull().sum()

                # Handle missing values by imputing with mean
                numerical_features.fillna(numerical_features.mean(), inplace=True)
               
                # Scale and predict cluster
                scale = pickle.load(open('scaler_model.pkl','rb')) # Load the scaler model
                num_scaled = scale.transform(numerical_features)
                data_scaled = pd.DataFrame(num_scaled, columns=numerical_features.columns)
                
                # Predict cluster using the clustering model
                model = pickle.load(open('clustering_model.pkl','rb')) # Load the clustering model
                user_cluster = model.predict(data_scaled)[0] # Predict the cluster for the user input song
                
                # Recommend a random song from the same cluster
                songs_cluster = larger_set_songs[larger_set_songs["cluster"]==user_cluster] # Select songs from the same cluster
                random = songs_cluster.sample()  # Select a random song from the cluster
                print("You should check out:")
                print(random['song_name'].values[0], ' by ', random['name'].values[0])  # Print the recommended song and artist
            break  # Exit loop after finding a match

    if not match_found:  # If no match is found
        print("Sorry, the entered song could not be found or does not match the artist.")
else: # If search results are empty
    print("Sorry, no search results found for the entered song.")


Hey, want some music recommendations? I'm here to help!


Enter a song title:  gangnam style
Enter the name of the artist:  psy


You should check out:
My Shot  by  Anthony Ramos
