In [69]:
# Import libraries
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from recommender import recommender
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import creds  # Import
import requests
from euclidean_recommender import recommender_euclidean
from kmeans_recommender import main
from sklearn.cluster import KMeans

In [70]:
# Import each playlist representing a specific mood
drake_sad = pd.read_csv('playlist_1.csv')
drake_hype = pd.read_csv('playlist_2.csv')
drake_chill = pd.read_csv('playlist_3.csv')
drake_romantic = pd.read_csv('playlist_4.csv')
drake_party = pd.read_csv('playlist_5.csv')

## Steps to Evalutate Recommendation system:
1. Split data into train and test
2. Run the recommendations on each song in the playlist, add recommendations to a recommendation list
3. Check if the recommendations are accurate with the test data
4. Compute metrics 

### Step 1: Splitting the Data

In [71]:
# Define a function to split the dataset into training and testing
def split_dataset(df, train_ratio=0.7):
    num_rows = len(df)
    num_train = int(num_rows * train_ratio)
    
    # Shuffle the DataFrame rows
    shuffled_df = df.sample(frac=1, random_state=42)
    
    # Split into training and testing DataFrames
    train_df = shuffled_df.iloc[:num_train]
    test_df = shuffled_df.iloc[num_train:]
    
    return train_df, test_df

In [72]:
# Splitting data
sad_train, sad_test = split_dataset(drake_sad)
hype_train, hype_test = split_dataset(drake_hype)
chill_train, chill_test = split_dataset(drake_chill)
romantic_train, romantic_test = split_dataset(drake_romantic)
party_train, party_test = split_dataset(drake_party)

In [73]:
#Read in dataset containing all drake songs
drake_df = pd.read_csv('drake_songs_dataset.csv')

#Get desired audio features
selected_features = [
    'danceability', 'energy', 'key', 'loudness',
    'speechiness', 'acousticness', 'instrumentalness', 
    'liveness', 'valence', 'tempo'
]

In [74]:
#Function that scales all data before computing cosine similarity matrix
def scale_data(input_song, drake_df):
    # Making a copy to not alter drake_df
    recommender_dataset = drake_df.copy()
    
    # Removing input song from recommender_dataset so it isn't recommended
    recommender_dataset = recommender_dataset[recommender_dataset['track_uri'] != input_song['track_uri']]

    #Getting only necessary columns before concat
    recommender_dataset = recommender_dataset[selected_features].copy()
    input_song = input_song[selected_features].copy().to_frame().T
    
    #Combining rows for features scaling
    all_features = pd.concat([input_song, recommender_dataset])
    scaler = StandardScaler()
    all_features_scaled = scaler.fit_transform(all_features)

    user_features = all_features_scaled[:1, :].copy()
    dataset_features = all_features_scaled[1:, :].copy()
    
    return user_features, dataset_features
    
        

In [75]:
# Function that runs the recommendation system using cosine similarity
def make_recs_cosine(input_song_index, playlist_df, drake_df):
    #Keeps all columns so that we can extract the recommended song names and artists later
    df_all_cols = drake_df.copy()

    # Get the input song that we will make recommendations from
    input_song = playlist_df.iloc[input_song_index]

    #Remove the user's inputted track from original dataset so it isn't recommended later on
    drake_df =  drake_df[drake_df['track_uri'] != input_song['track_uri']]

    #Scale data
    user_features, dataset_features = scale_data(input_song, drake_df)

    # Recommending system
    return recommender(user_features, dataset_features, df_all_cols, 5)
    

In [76]:
# Function that runs the recommendation system using Euclidean Distance
def make_recs_euclidean(input_song_index, playlist_df, drake_df):
    #Keeps all columns so that we can extract the recommended song names and artists later
    df_all_cols = drake_df.copy()

    # Get the input song that we will make recommendations from
    input_song = playlist_df.iloc[input_song_index]

    #Remove the user's inputted track from original dataset so it isn't recommended later on
    drake_df =  drake_df[drake_df['track_uri'] != input_song['track_uri']]

    #Scale data
    user_features, dataset_features = scale_data(input_song, drake_df)

    # Recommending system
    return recommender_euclidean(user_features, dataset_features, df_all_cols, 5)
    

In [77]:
# Function that runs recommendation system using KMeans
def make_recs_kmeans(input_song_index, df):

    # Scale data
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df[selected_features])

    # Perform KMeans
    num_clusters = 5
    kmeans = KMeans(n_clusters = num_clusters, random_state = 42, n_init = 10)
    kmeans.fit(scaled_features)

    # Identify the cluster to which the target song belongs
    target_song_features = scaled_features[input_song_index]  # Replace target_song_index with the index of your target song
    target_song_cluster = kmeans.predict([target_song_features])[0]
   
    # Find songs in the same cluster as the target song
    df['cluster'] = kmeans.labels_
    
    songs_in_same_cluster = df[df['cluster'] == target_song_cluster]

    # Now you can recommend songs from the same cluster
    recommended_songs = songs_in_same_cluster.sample(n=5, replace = True)
    return recommended_songs
    

In [78]:
def calculate_accuracy(recommended_songs, test_songs):
    # Convert the recommended and test songs to sets for efficient comparison
    recommended_set = set(recommended_songs)
    test_set = set(test_songs)
    
    # Count the number of correct recommendations (intersection of sets)
    num_correct_recommendations = len(recommended_set & test_set)
    
    # Calculate accuracy
    accuracy = num_correct_recommendations / len(test_set)
    
    return accuracy

def calculate_precision(recommended_songs, test_songs):
    recommended_set = set(recommended_songs)
    test_set = set(test_songs)
    
    true_positives = len(recommended_set & test_set)
    false_positives = len(recommended_set - test_set)
    
    precision = true_positives / (true_positives + false_positives)
    return precision

def calculate_recall(recommended_songs, test_songs):
    recommended_set = set(recommended_songs)
    test_set = set(test_songs)
    
    true_positives = len(recommended_set & test_set)
    false_negatives = len(test_set - recommended_set)
    
    recall = true_positives / (true_positives + false_negatives)
    return recall

def calculate_f1(precision, recall):
    return 2 * (precision * recall) / (precision + recall)



### Steps 2-4: Running the recommendation system and testing performance of Cosine Similiarity recommending system

In [79]:
NUM_PLAYLISTS = 5

# List of playlists (dataframes) to test recommendation performance on
playlist_list = [drake_sad, drake_hype, drake_chill, drake_romantic, drake_party]

#Empty lists to calculate average metrics later
accuracy_sum = 0
precision_sum = 0
recall_sum = 0
f1_sum = 0

# Run the recommendation system on every song in the list for each playlist
for j, playlist in enumerate(playlist_list):
    #Splitting into training and testing data
    training_data, testing_data = split_dataset(playlist)

    # Create empty list to store recommendations
    recommendations = []

    # Running recommendation system
    for i, row in playlist.iterrows():
        recommendations += make_recs_cosine(i, playlist, drake_df)['track_name'].to_list()

    # Outputting accuracy for playlist
    print(f"Playlist {j + 1}")
    print("-------------------")    
    print(f"Accuracy: {round(calculate_accuracy(recommendations, testing_data['track_name']),2 )}\nPrecision: {round(calculate_precision(recommendations, testing_data['track_name']), 2)}\nRecall: {round(calculate_recall(recommendations, testing_data['track_name']), 2)}\nF1 score: {round(calculate_f1(calculate_precision(recommendations, testing_data['track_name']), calculate_recall(recommendations, testing_data['track_name'])), 2)}")
    print("-------------------")    

    #Storing metrics for later
    accuracy_sum += round(calculate_accuracy(recommendations, testing_data['track_name']), 2)
    precision_sum += round(calculate_precision(recommendations, testing_data['track_name']), 2)
    recall_sum += round(calculate_recall(recommendations, testing_data['track_name']), 2)
    f1_sum = round(calculate_f1(calculate_precision(recommendations, testing_data['track_name']), calculate_recall(recommendations, testing_data['track_name'])), 2)

# Aggregate metrics
print(f"Mean Accuracy: {accuracy_sum / NUM_PLAYLISTS}")
print(f"Mean Precision: {precision_sum / NUM_PLAYLISTS}")
print(f"Mean Recall: {recall_sum / NUM_PLAYLISTS}")
print(f"Mean F1 score: {f1_sum / NUM_PLAYLISTS}")

Playlist 1
-------------------
Accuracy: 0.55
Precision: 0.05
Recall: 0.55
F1 score: 0.1
-------------------
Playlist 2
-------------------
Accuracy: 0.56
Precision: 0.08
Recall: 0.56
F1 score: 0.14
-------------------
Playlist 3
-------------------
Accuracy: 0.69
Precision: 0.16
Recall: 0.69
F1 score: 0.27
-------------------
Playlist 4
-------------------
Accuracy: 0.75
Precision: 0.22
Recall: 0.75
F1 score: 0.34
-------------------
Playlist 5
-------------------
Accuracy: 0.74
Precision: 0.26
Recall: 0.74
F1 score: 0.38
-------------------
Mean Accuracy: 0.658
Mean Precision: 0.154
Mean Recall: 0.658
Mean F1 score: 0.076


### Steps 2-4: Running the recommendation system and testing performance of Euclidean Distiance recommending system

In [80]:
NUM_PLAYLISTS = 5

# List of playlists (dataframes) to test recommendation performance on
playlist_list = [drake_sad, drake_hype, drake_chill, drake_romantic, drake_party]

#Empty lists to calculate average metrics later
accuracy_sum = 0
precision_sum = 0
recall_sum = 0
f1_sum = 0

# Run the recommendation system on every song in the list for each playlist
for j, playlist in enumerate(playlist_list):
    #Splitting into training and testing data
    training_data, testing_data = split_dataset(playlist)

    # Create empty list to store recommendations
    recommendations = []

    # Running recommendation system
    for i, row in playlist.iterrows():
        recommendations += make_recs_euclidean(i, playlist, drake_df)['track_name'].to_list()

    # Outputting accuracy for playlist
    print(f"Playlist {j + 1}")
    print("-------------------")    
    print(f"Accuracy: {round(calculate_accuracy(recommendations, testing_data['track_name']),2 )}\nPrecision: {round(calculate_precision(recommendations, testing_data['track_name']), 2)}\nRecall: {round(calculate_recall(recommendations, testing_data['track_name']), 2)}\nF1 score: {round(calculate_f1(calculate_precision(recommendations, testing_data['track_name']), calculate_recall(recommendations, testing_data['track_name'])), 2)}")
    print("-------------------")    

    #Storing metrics for later
    accuracy_sum += round(calculate_accuracy(recommendations, testing_data['track_name']), 2)
    precision_sum += round(calculate_precision(recommendations, testing_data['track_name']), 2)
    recall_sum += round(calculate_recall(recommendations, testing_data['track_name']), 2)
    f1_sum = round(calculate_f1(calculate_precision(recommendations, testing_data['track_name']), calculate_recall(recommendations, testing_data['track_name'])), 2)

# Aggregate metrics
print(f"Mean Accuracy: {accuracy_sum / NUM_PLAYLISTS}")
print(f"Mean Precision: {precision_sum / NUM_PLAYLISTS}")
print(f"Mean Recall: {recall_sum / NUM_PLAYLISTS}")
print(f"Mean F1 score: {f1_sum / NUM_PLAYLISTS}")

Playlist 1
-------------------
Accuracy: 0.64
Precision: 0.06
Recall: 0.64
F1 score: 0.11
-------------------
Playlist 2
-------------------
Accuracy: 0.58
Precision: 0.09
Recall: 0.58
F1 score: 0.15
-------------------
Playlist 3
-------------------
Accuracy: 0.7
Precision: 0.17
Recall: 0.7
F1 score: 0.27
-------------------
Playlist 4
-------------------
Accuracy: 0.77
Precision: 0.23
Recall: 0.77
F1 score: 0.35
-------------------
Playlist 5
-------------------
Accuracy: 0.73
Precision: 0.26
Recall: 0.73
F1 score: 0.38
-------------------
Mean Accuracy: 0.6839999999999999
Mean Precision: 0.162
Mean Recall: 0.6839999999999999
Mean F1 score: 0.076


### Steps 2-4: Running the recommendation system and testing performance of KMeans recommending system

In [81]:
NUM_PLAYLISTS = 5

# List of playlists (dataframes) to test recommendation performance on
playlist_list = [drake_sad, drake_hype, drake_chill, drake_romantic, drake_party]

#Empty lists to calculate average metrics later
accuracy_sum = 0
precision_sum = 0
recall_sum = 0
f1_sum = 0

# Run the recommendation system on every song in the list for each playlist
for j, playlist in enumerate(playlist_list):
    #Splitting into training and testing data
    training_data, testing_data = split_dataset(playlist)

    # Create empty list to store recommendations
    recommendations = []
    
    # Running recommendation system
    for i, row in playlist.iterrows():
        recommendations += make_recs_kmeans(i, playlist)['track_name'].to_list()

    # Outputting accuracy for playlist
    print(f"Playlist {j + 1}")
    print("-------------------")    
    print(f"Accuracy: {round(calculate_accuracy(recommendations, testing_data['track_name']),2 )}\nPrecision: {round(calculate_precision(recommendations, testing_data['track_name']), 2)}\nRecall: {round(calculate_recall(recommendations, testing_data['track_name']), 2)}\nF1 score: {round(calculate_f1(calculate_precision(recommendations, testing_data['track_name']), calculate_recall(recommendations, testing_data['track_name'])), 2)}")
    print("-------------------")    

    #Storing metrics for later
    accuracy_sum += round(calculate_accuracy(recommendations, testing_data['track_name']), 2)
    precision_sum += round(calculate_precision(recommendations, testing_data['track_name']), 2)
    recall_sum += round(calculate_recall(recommendations, testing_data['track_name']), 2)
    f1_sum = round(calculate_f1(calculate_precision(recommendations, testing_data['track_name']), calculate_recall(recommendations, testing_data['track_name'])), 2)

# Aggregate metrics
print(f"Mean Accuracy: {accuracy_sum / NUM_PLAYLISTS}")
print(f"Mean Precision: {precision_sum / NUM_PLAYLISTS}")
print(f"Mean Recall: {recall_sum / NUM_PLAYLISTS}")
print(f"Mean F1 score: {f1_sum / NUM_PLAYLISTS}")b


Playlist 1
-------------------
Accuracy: 1.0
Precision: 0.31
Recall: 1.0
F1 score: 0.47
-------------------
Playlist 2
-------------------
Accuracy: 1.0
Precision: 0.3
Recall: 1.0
F1 score: 0.46
-------------------
Playlist 3
-------------------
Accuracy: 1.0
Precision: 0.35
Recall: 1.0
F1 score: 0.52
-------------------
Playlist 4
-------------------
Accuracy: 1.0
Precision: 0.39
Recall: 1.0
F1 score: 0.56
-------------------
Playlist 5
-------------------
Accuracy: 1.0
Precision: 0.41
Recall: 1.0
F1 score: 0.58
-------------------
Mean Accuracy: 1.0
Mean Precision: 0.352
Mean Recall: 1.0
Mean F1 score: 0.11599999999999999


In [88]:
# Scaling data
data = drake_df[selected_features]
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

#Fitting model
model = KMeans(n_clusters = 5, random_state = 42, n_init = 10)
model.fit(scaled_data)



In [98]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Step 1: Divide "sad song" playlist into input and true recommended song groups
input_group, true_recommended_group = train_test_split(drake_sad[selected_features], test_size=0.3, random_state=42)

# Step 2: Train the KMeans model
scaler = StandardScaler()
scaled_features = scaler.fit_transform(drake_df[selected_features])

kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(scaled_features)

 # Find songs in the same cluster as the target song
drake_df['cluster'] = kmeans.labels_

# Initialize lists to store evaluation metrics
precision_list = []
recall_list = []
f1_list = []




  super()._check_params_vs_input(X, default_n_init=10)


In [119]:
recommendations = []
true_recommended_songs = true_recommended_group['track_name']
for input_song_index, input_song in input_group.iterrows():
    target_song_features = scaled_features[input_song_index]
    target_song_cluster = kmeans.predict([target_song_features])[0]

    songs_in_same_cluster = drake_df[drake_df['cluster'] == target_song_cluster]
    recommendations += songs_in_same_cluster.sample(n=5)['track_name'].tolist()
    

KeyError: 'track_name'

In [122]:
f1_score(recommendations, true_recommended_songs)

ValueError: Found input variables with inconsistent numbers of samples: [0, 18]

In [109]:
true_recommended_songs

['Search & Rescue',
 'Club Paradise',
 'Lose You',
 'Fire & Desire',
 'From Florida With Love',
 'Trust Issues',
 'The Real Her',
 'My Side',
 'Take Care',
 'Can I',
 'Too Much',
 'Teenage Fever',
 'Marvins Room',
 'Chicago Freestyle (feat. Giveon)',
 'Furthest Thing',
 'Losses',
 'Doing It Wrong',
 'Jungle']

In [110]:
set(recommendations)

{'4422',
 '9',
 'A Keeper',
 'Brand New',
 "Bria's Interlude (feat. Omarion)",
 'Broke Boys',
 "Cece's Interlude",
 'Change Locations',
 'Come and See Me (feat. Drake)',
 'Company',
 'Congratulations',
 'Crew Love',
 'D4L',
 'Deep Pockets',
 'Demons (feat. Fivio Foreign & Sosa Geek)',
 'Don’t Matter To Me (with Michael Jackson)',
 'Draft Day',
 'Dreams Money Can Buy',
 'Ela É do Tipo (feat. Drake) [Remix]',
 'Faithful',
 'Final Fantasy',
 'Fire & Desire',
 "Flight's Booked",
 'From Florida With Love',
 'From Time',
 'GREECE (feat. Drake)',
 'Get Along Better',
 'Get It Together',
 'Girls Love Beyoncé (feat. James Fauntleroy)',
 'Girls Want Girls (with Lil Baby)',
 "God's Plan",
 'Gyalchester',
 "Hold On, We're Going Home",
 'Hours In Silence',
 'Houstatlantavegas',
 'How Bout Now',
 'I Get Lonely',
 'I Guess It’s Fuck Me',
 "I'm The Plug",
 'IMY2 (with Kid Cudi)',
 'In My Feelings',
 'Jersey',
 'Jodeci Freestyle (feat. J. Cole)',
 'Jorja Interlude',
 'Jumpman',
 'Jungle',
 'KMT',
 'Kee

In [114]:
len(set(recommendations) & set(true_recommended_songs)) / len(true_recommended_songs)

0.3333333333333333

In [None]:
# Step 6: Calculate aggregate metrics
mean_precision = sum(precision_list) / len(precision_list)
mean_recall = sum(recall_list) / len(recall_list)
mean_f1 = sum(f1_list) / len(f1_list)

# Step 7: Interpret and analyze the results
print(f"Mean Precision: {mean_precision}")
print(f"Mean Recall: {mean_recall}")
print(f"Mean F1-score: {mean_f1}")