<a href="https://colab.research.google.com/github/sonalibasu/music-recommendation-spotify/blob/main/extract_process_predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **IMT 575: Spotify Music Mood Classification**

In [None]:
# Optional Code Block: Execute to install required packages
!pip install spotipy
!pip install getpass

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[31mERROR: Could not find a version that satisfies the requirement getpass (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for getpass[0m[31m
[0m

In [None]:
import pandas as pd
import numpy as np
import spotipy
import getpass
from spotipy.oauth2 import SpotifyClientCredentials
#Authentication - without use
cid = getpass.getpass(prompt="Enter Spotify Developer Client Credentials")
secret = getpass.getpass(prompt="Enter Spotify Developer Client Secret")


client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

Enter Spotify Developer Client Credentials··········
Enter Spotify Developer Client Secret··········


In [None]:
# Source 2: Get Songs From Mood Playlists
def get_playlist_track_uris(playlists:list) -> list:
    track_uri = []
    for playlist in playlists:
        track_uri.extend([x["track"]["uri"] for x in
                          sp.playlist_tracks(playlist)["items"]])
    return track_uri

def extract_song_information_from_playlist(playlists:list, mood):
    """
    Given the uris of one or more playlists OR tracks, the function extracts and creates a dataframe
    of song details and quantitative features.
    """
    song_list = []
    for playlist_uri in playlists:
        tracks = sp.playlist_tracks(playlist_uri)["items"]
        for track in tracks:
            song = {}
            song['mood'] = mood
            #Song & Artist information, popularity, genre
            song['track_id'] = track["track"]["uri"].split(':')[-1]
            song['artists'] = track["track"]["artists"][0]["name"]
            song['album_name'] = track["track"]["album"]["name"]
            song['track_name'] = track["track"]["name"]
            song['popularity'] = track['track']["popularity"]
            song['duration_ms'] = track["track"]["duration_ms"]
            song['explicit'] = track["track"]["explicit"]
            audio_features = ['danceability','energy','key','loudness','mode','speechiness',
                             'acousticness','instrumentalness','liveness','valence','tempo']
            song_metrics = sp.audio_features(song['track_id'])[0]
            for feature, value in song_metrics.items():
                if feature in audio_features:
                    song[feature] = song_metrics[feature]
            # Artist API endpoint for genre
            song['track_genre'] = sp.artist(track["track"]["artists"][0]["uri"])["genres"]
            # Picks the single top genre for each song
            if song['track_genre'] == []:
                song['track_genre'] = None
            else:
                song['track_genre'] = song['track_genre'][0]
            song_list.append(song)
    # List of dicts -> pd df
    df = pd.DataFrame(song_list)
    return df


# Extracting songs from mood playlists titled "sad":  
1) "Sad Songs - Songs for a broken heart"  
URL: https://open.spotify.com/playlist/37i9dQZF1DX7qK8ma5wgG1  
2) "Life Sucks - Having a bad day? We know how it feels!"  
URL: https://open.spotify.com/playlist/37i9dQZF1DX3YSRoSdA634  
3) "Sad Hour - Somehow heartbreak feels good in a place like this"  
URL: https://open.spotify.com/playlist/37i9dQZF1DWSqBruwoIXkA  

In [None]:
%%time
### SAD LABEL
playlists = ['37i9dQZF1DX7qK8ma5wgG1','37i9dQZF1DWSqBruwoIXkA', '37i9dQZF1DX3YSRoSdA634']
sad_df = extract_song_information_from_playlist(playlists,'sad')
print(f"sad_df shape = {sad_df.shape} songs.")
sad_df = sad_df.drop_duplicates()
print(f"Final no. of songs with mood label 'sad' after deduplication = {sad_df.shape[0]} songs.")

sad_df shape = (280, 20) songs.
Final no. of songs with mood label 'sad' after deduplication = 240 songs.
CPU times: user 1.79 s, sys: 170 ms, total: 1.96 s
Wall time: 36.6 s


In [None]:
%%time
### HAPPY LABEL
playlists = ['37i9dQZF1DWYBO1MoTDhZI','37i9dQZF1DXdPec7aLTmlC', '37i9dQZF1DX7KNKjOK0o75']
happy_df = extract_song_information_from_playlist(playlists,'happy')
print(f"sad_df shape = {happy_df.shape} songs.")
happy_df = happy_df.drop_duplicates()
print(f"Final no. of songs with mood label 'happy' after deduplication = {happy_df.shape[0]} songs.")

sad_df shape = (260, 20) songs.
Final no. of songs with mood label 'happy' after deduplication = 250 songs.
CPU times: user 1.55 s, sys: 155 ms, total: 1.7 s
Wall time: 50.2 s


In [None]:
%%time
### LOVE LABEL
playlists = ['37i9dQZF1DX5IDTimEWoTd','37i9dQZF1DXbEm2sKzgoJ8', '37i9dQZF1DX6mvEU1S6INL']
love_df = extract_song_information_from_playlist(playlists,'love')
print(f"love_df shape = {love_df.shape} songs.")
love_df = love_df.drop_duplicates()
print(f"Final no. of songs with mood label 'love' after deduplication = {love_df.shape[0]} songs.")

love_df shape = (300, 20) songs.
Final no. of songs with mood label 'love' after deduplication = 295 songs.
CPU times: user 1.79 s, sys: 151 ms, total: 1.94 s
Wall time: 59.5 s


In [None]:
%%time
### DANCE LABEL
playlists = ['37i9dQZF1DXa2PvUpywmrr','37i9dQZF1DXaXB8fQg7xif', '37i9dQZF1DX5I05jXm1F2M']
party_df = extract_song_information_from_playlist(playlists,'party')
print(f"party_df shape = {party_df.shape} songs.")
party_df = party_df.drop_duplicates()
print(f"Final no. of songs with mood label 'party' after deduplication = {party_df.shape[0]} songs.")

party_df shape = (290, 20) songs.
Final no. of songs with mood label 'party' after deduplication = 279 songs.
CPU times: user 1.65 s, sys: 172 ms, total: 1.83 s
Wall time: 48.2 s


In [None]:
%%time
### CALM LABEL
playlists = ['37i9dQZF1DX889U0CL85jj','37i9dQZF1DX6QClArDhvcW', '37i9dQZF1DX6ziVCJnEm59']
calm_df = extract_song_information_from_playlist(playlists,'calm')
print(f"calm_df shape = {calm_df.shape} songs.")
calm_df = calm_df.drop_duplicates()
print(f"Final no. of songs with mood label 'calm' after deduplication = {calm_df.shape[0]} songs.")

calm_df shape = (300, 20) songs.
Final no. of songs with mood label 'calm' after deduplication = 300 songs.
CPU times: user 1.74 s, sys: 180 ms, total: 1.92 s
Wall time: 51.2 s


In [None]:
df = pd.concat([sad_df, happy_df,love_df,party_df,calm_df])


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

# Data preprocessing
df.dropna(subset=['track_genre'], inplace=True)  # Remove rows with missing track_genre
df.reset_index(drop=True, inplace=True)

text_data = df['artists'] + ' ' + df['track_genre']
vectorizer = CountVectorizer()
X_text = vectorizer.fit_transform(text_data)
X_numerical = df[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
                  'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'explicit']]
scaler = StandardScaler()
X_numerical_scaled = scaler.fit_transform(X_numerical)
X = np.concatenate((X_text.toarray(), X_numerical_scaled), axis=1)
y = df['mood']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=24)

# Model training and evaluation
classifier = MLPClassifier(hidden_layer_sizes=(100, 50), activation='relu',
                           max_iter=1000, random_state=24, alpha=1)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.5809128630705395


In [None]:
def get_song_metrics(track_id):
    song_metrics = sp.audio_features(track_id)[0]
    # Get the song name from the track object
    track = sp.track(track_id)
    song_metrics['name'] = track['name']
    # Access the artists' URIs from the track object
    song_metrics['artists'] = sp.artist(track['artists'][0]['uri'])['name']
    song_metrics['explicit'] = track["explicit"]
    # Get track genre
    song_metrics['track_genre'] = sp.artist(track["artists"][0]["uri"])["genres"][0]
    df = pd.DataFrame(song_metrics, index=[0])
    return df

def get_song_recommendations(song_metrics):
  print(f"Generating the mood label for '{song_metrics['name'][0]}' by {song_metrics['artists'][0]}")
  song_metrics.reset_index(drop=True, inplace=True)
  text_data = song_metrics['artists'] + ' ' + song_metrics['track_genre']
  #Generating bag of words
  X_text = vectorizer.transform(text_data)
  X_numerical = song_metrics[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
                    'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'explicit']]
  X_numerical_scaled = scaler.transform(X_numerical)
  X = np.concatenate((X_text.toarray(), X_numerical_scaled), axis=1)
  y_pred = classifier.predict(X)
  predicted_mood = label_encoder.inverse_transform(y_pred)[0]
  print(f"Given that you're in the mood for {predicted_mood} songs, recommending similar songs:")
  random_rows = eval(predicted_mood+"_df.sample(n=5)")
  for index, row in random_rows.iterrows():
    print(f"{row['track_name']} by {row['artists']}" )


In [None]:
# Recommendation
track_id = input("Enter the URI for song you're currently listening to!\n")
song_metrics = get_song_metrics(track_id)
get_song_recommendations(song_metrics)
