# House Tyrell's Spotify Genre, Decade, and Song Recommender
This app predicts genre and decade for a given song/track and artist using Random Forest Classifier models.    
***

In [1]:
# Import the modules
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from IPython.display import display, clear_output, Markdown, HTML
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
import joblib

#Load the Trained Model and the fit Scaler
rfModel_decade = joblib.load('rfModel.joblib')
scaler_decade = joblib.load('scaler.joblib')

rfModel_genre = joblib.load('rfModel_genre.joblib')
scaler_genre = joblib.load('scaler_genre.joblib')

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import ipywidgets as widgets
from IPython.display import display

In [3]:
# Set up your Spotify API credentials
client_id = '7f37bb631ed34cf29a487f88d5c5e32f'
client_secret = 'aff5f020a49b470f99d7a3c251f5b649'

# Authenticate with the Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Define a function to get track audio features
def get_track_audio_features(artist_name, track_name):
    # Search for the track
    results = sp.search(q=f"artist:{artist_name} track:{track_name}", type='track')

    # Check if the search results contain any tracks
    if len(results['tracks']['items']) == 0:
        print(f"No track found for '{artist_name} - {track_name}'")
        return None

    # Get the first track from the search results
    track = results['tracks']['items'][0]

    # Get the track's ID
    track_id = track['id']

    # Get audio features for the track
    audio_features = sp.audio_features(track_id)

    # Extract desired features
    track_features = {}
#    track_features['Artist'] = artist_name
#    track_features['Track'] = track_name
    track_features['Valence'] = audio_features[0]['valence']
    track_features['Acousticness'] = audio_features[0]['acousticness']
    track_features['Danceability'] = audio_features[0]['danceability']
    track_features['Duration (ms)'] = audio_features[0]['duration_ms']
    track_features['Energy'] = audio_features[0]['energy']
    track_features['Explicit'] = track['explicit']
    track_features['Instrumentalness'] = audio_features[0]['instrumentalness']
    track_features['Key'] = audio_features[0]['key']
    track_features['Liveness'] = audio_features[0]['liveness']
    track_features['Loudness'] = audio_features[0]['loudness']
    track_features['Mode'] = audio_features[0]['mode']
    track_features['Popularity'] = track['popularity']
    track_features['Speechiness'] = audio_features[0]['speechiness']
    track_features['Tempo'] = audio_features[0]['tempo']
    track_features['Time Signature'] = audio_features[0]['time_signature']
    
    # Get the release date of the track
    release_date = track['album']['release_date']
    actual_year = release_date.split('-')[0]       # Extract the year from the release date
    
    # Get the spotify song recomendations
    recommendations = sp.recommendations(seed_tracks=[track_id], limit=5)
    
    # Get the track URL
    track_url = track['external_urls']['spotify']
    
    # Get the album cover images and URL of the first cover image
    album_cover_images = track['album']['images']
    cover_image_url = album_cover_images[0]['url']

    return track_features, actual_year, recommendations, track_url, cover_image_url


In [4]:
def predict_decade(track_features): 
    # Create a new dictionary to store a decade-model-specific dictionary
    track_features_decade = {key: value for key, value in track_features.items() if key != 'Time Signature'}
    
    # Convert the dictionary to an array
    feature_array_decade = [track_features_decade[key] for key in track_features_decade]
    
    feature_array_decade = np.array(feature_array_decade).reshape(1, -1)
    spotify_track_scaled_decade = scaler_decade.transform(feature_array_decade)  # Preprocess the sample data using the scaler

    # Make the prediction
    predicted_decade = rfModel_decade.predict(spotify_track_scaled_decade)
    
    return predicted_decade


In [5]:
def predict_genre(track_features):     
    # Rearrange track_features to match the genre rfModel
    track_features_genre = {}
    track_features_genre['Popularity'] = track_features['Popularity']
    track_features_genre['Danceability'] = track_features['Danceability']
    track_features_genre['Energy'] = track_features['Energy']
    track_features_genre['Key'] = track_features['Key']
    track_features_genre['Loudness'] = track_features['Loudness']
    track_features_genre['Mode'] = track_features['Mode']
    track_features_genre['Speechiness'] = track_features['Speechiness']
    track_features_genre['Acousticness'] = track_features['Acousticness']
    track_features_genre['Instrumentalness'] = track_features['Instrumentalness']
    track_features_genre['Liveness'] = track_features['Liveness']
    track_features_genre['Valence'] = track_features['Valence']
    track_features_genre['Tempo'] = track_features['Tempo']
    track_features_genre['Duration (ms)'] = track_features['Duration (ms)']
    track_features_genre['Time Signature'] = track_features['Time Signature']
    
    # Convert the dictionary to an array
    feature_array_genre = [track_features_genre[key] for key in track_features_genre]
    
    feature_array_genre = np.array(feature_array_genre).reshape(1, -1)
    spotify_track_scaled_genre = scaler_genre.transform(feature_array_genre)  # Preprocess the sample data using the scaler

    # Make the prediction
    prediction = rfModel_genre.predict(spotify_track_scaled_genre)

    # Convert prediction output to Genre
    genre_label = {
        0: 'Acoustic/Folk',
        1: 'Alt_Music',
        2: 'Blues',
        3: 'Bollywood',
        4: 'Country',
        5: 'HipHop',
        6: 'Indie Alt',
        7: 'Instrumental',
        8: 'Metal',
        9: 'Pop',
        10: 'Rock'
    }

    predicted_genre = genre_label[prediction[0]]
    
    return predicted_genre



In [6]:
def plot_track_spider_data(artist_name, track_name):
    track_features, actual_year, recommendations, track_url, cover_image_url = get_track_audio_features(artist_name, track_name)

    if track_features is None:
        return None

    # Extract categories and values from track features
    categories = ['Valence', 'Acousticness', 'Danceability', 'Energy', 'Instrumentalness',
                  'Liveness', 'Loudness', 'Speechiness', 
                  'Key', 'Tempo', 'Duration (min)', 'Popularity', 'Mode', 'Time Signature']
    values = [track_features['Valence']*10, track_features['Acousticness']*10, track_features['Danceability']*10,
              track_features['Energy']*10, track_features['Instrumentalness']*10, track_features['Liveness']*10,
              track_features['Loudness']/-10, track_features['Speechiness']*10, 
              track_features['Key'], track_features['Tempo']/10, track_features['Duration (ms)']/1000/60, 
              track_features['Popularity']/10, track_features['Mode']*10, track_features['Time Signature']]
    
    angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
    angles += angles[:1]  # Append the starting angle to close the plot.
    
    fig, ax = plt.subplots(subplot_kw={'polar': True})
    
    ax.plot(angles, values + values[:1], linewidth=1, linestyle='solid', marker='o')
    ax.fill(angles, values + values[:1], alpha=0.25)

    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(categories)
    
#    plt.title('Song Features')
    plt.show()



In [7]:
# Use HTML Markdown to style the headers
def styled_header(header_text):
    styled_header = '''
    <div style="background-color: gray; padding: 6px; font-family: Arial; text-align: left;">
        <h2 style="color: white; text-decoration: underline;">{}</h2>
    </div>
    '''.format(header_text)
    return styled_header

def styled_paragraph(paragraph_text):
    styled_paragraph = '''
    <div style="font-family: Arial; color: blue; font-style: italic; font-size: 20px;">
        <p>{}</p>
    </div>
    '''.format(paragraph_text)
    return styled_paragraph


In [8]:
# Create text input widgets
track_name_widget = widgets.Text(description='Track Name:')
artist_name_widget = widgets.Text(description='Artist Name:')

# Create button widget
button = widgets.Button(description='FIND MY SONG!')

# Create image widget
image_widget = widgets.Image()

# Modify the button style
button.style.button_color = 'lightblue'
button.layout.width = '200px'
button.layout.height = '50px'
button.layout.font_size = '20px'

output = widgets.Output()

# Define function to handle button click event
def on_button_click(b):
    track_name = track_name_widget.value
    artist_name = artist_name_widget.value       
    
    with output:
        clear_output(wait=True) 
        
        track_features, actual_year, recommendations, track_url, cover_image_url = get_track_audio_features(artist_name, track_name)
        predicted_decade = predict_decade(track_features)
        predicted_genre = predict_genre(track_features)
        
        print("")
        
        audio_element = f'<audio src="{track_url}" controls></audio>'
        display(HTML(audio_element))
        
        display(Markdown('---'))  # Display horizontal line
        predicted_decade_header = styled_header("Predicted Decade (using House Tyrell ML Engine)")
        predicted_decade_line1 = styled_paragraph(f"Predicted decade for '{track_name}' by {artist_name}: {predicted_decade[0]}")
        predicted_decade_line2 = styled_paragraph(f"Actual year for '{track_name}' by {artist_name}: {actual_year}")
        
        display(HTML(predicted_decade_header))
        display(HTML(predicted_decade_line1))
        display(HTML(predicted_decade_line2))
        
        display(Markdown('---'))  # Display horizontal line
        predicted_genre_header = styled_header("Predicted Genre (using House Tyrell ML Engine)")
        predicted_genre_line = styled_paragraph(f"Predicted genre for '{track_name}' by {artist_name}: {predicted_genre}")
        
        display(HTML(predicted_genre_header))
        display(HTML(predicted_genre_line))        
        
        display(Markdown('---'))  # Display horizontal line 
        song_recs_header = styled_header("Spotify Song Recommendations (using Spotify ML Engine)")
        display(HTML(song_recs_header))
        for track in recommendations['tracks']:
            song_recs_line = styled_paragraph(f"Recommended Track: {track['artists'][0]['name']} - {track['name']}")
            display(HTML(song_recs_line))
        
        display(Markdown('---'))  # Display horizontal line
        feature_plot_header = styled_header("Track Features")
        display(HTML(feature_plot_header))        
        plot_track_spider_data(artist_name, track_name)

        print("")
        print("Danceability: describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity. A value of 0.0 is least danceable and 1.0 is most danceable.")
        print("")
        print("Acousticness: A confidence measure from 0.0 to 1.0 of whether the track is acoustic. 1.0 represents high confidence the track is acoustic.")
        print("")
        print("Valence: A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry).")
        print("")
        print("Speechiness: Detects the presence of spoken words in a track. The more exclusively speech-like the recording (e.g. talk show, audio book, poetry), the closer to 1.0 the attribute value. Values above 0.66 describe tracks that are probably made entirely of spoken words. Values between 0.33 and 0.66 describe tracks that may contain both music and speech, either in sections or layered, including such cases as rap music. Values below 0.33 most likely represent music and other non-speech-like tracks.")
        print("")
        print("Loudness: The overall loudness of a track in decibels (dB * 10). Loudness values are averaged across the entire track and are useful for comparing relative loudness of tracks. Loudness is the quality of a sound that is the primary psychological correlate of physical strength (amplitude). Values typically range between 0 and 60 db.")
        print("")
        print("Liveness: Detects the presence of an audience in the recording. Higher liveness values represent an increased probability that the track was performed live. A value above 0.8 provides strong likelihood that the track is live.")
        print("")
        print("Instrumentalness: Predicts whether a track contains no vocals. 'Ooh' and 'aah' sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly 'vocal'. The closer the instrumentalness value is to 1.0, the greater likelihood the track contains no vocal content. Values above 0.5 are intended to represent instrumental tracks, but confidence is higher as the value approaches 1.0.")
        print("")
        print("Energy: A measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include dynamic range, perceived loudness, timbre, onset rate, and general entropy.")
        print("")
        print("Popularity: The popularity of the track. The value will be between 0 and 100, with 100 being the most popular.")
        print("")
        print("Key: The key the track is in. Integers map to pitches using standard Pitch Class notation. E.g. 0 = C, 1 = C-sharp/D-flat, 2 = D, and so on. If no key was detected, the value is -1.")
        print("")
        print("Tempo: The overall estimated tempo of a track in beats per minute (BPM). In musical terminology, tempo is the speed or pace of a given piece and derives directly from the average beat duration.")
        print("")
        print("Mode: Mode indicates the modality (major or minor) of a track, the type of scale from which its melodic content is derived. Major is represented by 1 and minor is 0.")
        print("")
        print("Time Signature: An estimated time signature. The time signature (meter) is a notational convention to specify how many beats are in each bar (or measure). The time signature ranges from 3 to 7 indicating time signatures of '3/4', to '7/4'.")
        
# Attach button click event handler
button.on_click(on_button_click)

#info = Markdown("""# SPOTIFY DECADE PREDICTOR""")
makeDisplay = widgets.VBox([track_name_widget, artist_name_widget,  button, output])
display(makeDisplay)

VBox(children=(Text(value='', description='Track Name:'), Text(value='', description='Artist Name:'), Button(d…