In [218]:
# Import libraries
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from recommender import recommender
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import creds  # Import
import requests
from euclidean_recommender import recommender_euclidean
from kmeans_recommender import main
from sklearn.cluster import KMeans

In [219]:
# Compute the cosine similarity between the input song, and the recommended songs

# Compute the cosine similarity between the input song, and songs in the playlist that the recommendation system didn't see

# Calculate the average cosine similarity between the input song and other songs in the playlist that the recommendation system didn't see

# Compare the average cosine similarties

Make sure each recommendation system can generate recs

In [220]:
# Import each playlist representing a specific mood
drake_sad = pd.read_csv('playlist_1.csv')
drake_hype = pd.read_csv('playlist_2.csv')
drake_chill = pd.read_csv('playlist_3.csv')
drake_romantic = pd.read_csv('playlist_4.csv')
drake_party = pd.read_csv('playlist_5.csv')

In [221]:
playlist_list = [drake_sad, drake_hype, drake_chill, drake_romantic, drake_party]

In [222]:
#Read in dataset containing all drake songs
drake_df = pd.read_csv('drake_songs_dataset.csv')

#Get desired audio features
selected_features = [
    'danceability', 'energy', 'key', 'loudness',
    'speechiness', 'acousticness', 'instrumentalness', 
    'liveness', 'valence', 'tempo'
]

In [223]:
#Function that scales all data before computing cosine similarity matrix
def scale_data(input_song, drake_df):
    # Making a copy to not alter drake_df
    recommender_dataset = drake_df.copy()
    
    # Removing input song from recommender_dataset so it isn't recommended
    recommender_dataset = recommender_dataset[recommender_dataset['track_uri'] != input_song['track_uri']]

    #Getting only necessary columns before concat
    recommender_dataset = recommender_dataset[selected_features].copy()
    input_song = input_song[selected_features].copy().to_frame().T
    
    #Combining rows for features scaling
    all_features = pd.concat([input_song, recommender_dataset])
    scaler = StandardScaler()
    all_features_scaled = scaler.fit_transform(all_features)

    user_features = all_features_scaled[:1, :].copy()
    dataset_features = all_features_scaled[1:, :].copy()
    
    return user_features, dataset_features
    
        

In [224]:
# Function that runs the recommendation system using cosine similarity
def make_recs_cosine(input_song_index, playlist_df, drake_df):
    #Keeps all columns so that we can extract the recommended song names and artists later
    df_all_cols = drake_df.copy()

    # Get the input song that we will make recommendations from
    input_song = playlist_df.iloc[input_song_index]

    #Remove the user's inputted track from original dataset so it isn't recommended later on
    drake_df =  drake_df[drake_df['track_uri'] != input_song['track_uri']]

    #Scale data
    user_features, dataset_features = scale_data(input_song, drake_df)

    # Recommending system
    return recommender(user_features, dataset_features, df_all_cols, 5)
    

In [225]:
# Function that runs the recommendation system using Euclidean Distance
def make_recs_euclidean(input_song_index, playlist_df, drake_df):
    #Keeps all columns so that we can extract the recommended song names and artists later
    df_all_cols = drake_df.copy()

    # Get the input song that we will make recommendations from
    input_song = playlist_df.iloc[input_song_index]

    #Remove the user's inputted track from original dataset so it isn't recommended later on
    drake_df =  drake_df[drake_df['track_uri'] != input_song['track_uri']]

    #Scale data
    user_features, dataset_features = scale_data(input_song, drake_df)

    # Recommending system
    return recommender_euclidean(user_features, dataset_features, df_all_cols, 5)
    

In [226]:
# Function that runs recommendation system using KMeans
def make_recs_kmeans(input_song_index, playlist_df, df):

    # Scale data
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(playlist_df[selected_features])

    # Perform KMeans
    num_clusters = 5
    kmeans = KMeans(n_clusters = num_clusters, random_state = 42, n_init = 10)
    kmeans.fit(scaled_features)

    # Identify the cluster to which the target song belongs
    target_song_features = scaled_features[input_song_index]  # Replace target_song_index with the index of your target song
    target_song_cluster = kmeans.predict([target_song_features])[0]
   
    # Find songs in the same cluster as the target song
    playlist_df['cluster'] = kmeans.labels_
    
    songs_in_same_cluster = playlist_df[playlist_df['cluster'] == target_song_cluster]

    # Now you can recommend songs from the same cluster
    recommended_songs = songs_in_same_cluster.sample(n=5, replace = True)
    return recommended_songs
    

In [227]:
make_recs_cosine(5, drake_sad, drake_df)

Unnamed: 0,track_uri,track_name,album_name,duration_ms,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
94,spotify:track:11L064movtyopGdLiX4sVg,Peak,Scorpion,206026,0.687,0.218,9,-13.539,0.0388,0.85,0.000125,0.106,0.269,91.991
40,spotify:track:1PDP7mLiAMwhfmgIwzhOm2,Yebba’s Heartbreak,Certified Lover Boy,133762,0.476,0.161,8,-11.665,0.0407,0.967,0.0381,0.109,0.0908,119.614
114,spotify:track:2fkeWbM6iqTw7oGHTYm2lw,4422,More Life,186293,0.609,0.229,11,-12.36,0.0333,0.558,0.0413,0.113,0.19,111.887
81,spotify:track:3e0ZGE7Gp034iLknjQk4QW,Can I,Care Package,189289,0.643,0.178,4,-8.56,0.0588,0.484,0.0108,0.122,0.0674,85.042
145,spotify:track:3ppVO2tyWRRznNmONvt7Se,Summers Over Interlude,Views,106333,0.699,0.255,4,-8.647,0.0303,0.405,0.00242,0.0985,0.242,132.031


In [228]:
from statistics import mean

In [229]:
def evaluate_recommendations(model, playlist_list):
    for i, playlist in enumerate(playlist_list):
        # Split the data into input songs, and songs that won't be seen
        input_group, unseen_group = train_test_split(playlist, test_size=0.3, random_state=42)

        # Initialize empty np arrays that will hold the cosine similarity scores, which will be averaged out later
        all_similarity_unseen = []
        all_similarity_recs = []

        # Loop through all songs in the input_group
        for input_song_index, input_song in input_group.iterrows():
            # Make recommendations with the model we are testing
            recs = model(input_song_index, playlist, drake_df)

            # Extract audio features from input song, unseen group, and recommended songs
            input_features = input_song[selected_features].values.reshape(1, -1)
            unseen_group_features = unseen_group[selected_features].values
            recs_features = recs[selected_features].values

            # Calculate cosine similarities between the input and unseen songs 
            similarities_unseen = cosine_similarity(input_features, unseen_group_features).flatten().tolist()
            all_similarity_unseen += similarities_unseen
            
            # Calculate cosine similarities between the input and the recommended songs
            similarities_recs = cosine_similarity(input_features, recs_features).flatten().tolist()
            all_similarity_recs += similarities_recs
            
        print(f"Playlist {i + 1}")
        print(f"Average Cosine Similarity between input and unseen songs {mean(all_similarity_unseen)}")
        print(f"Average Cosine Similarity between input and recommended songs {mean(all_similarity_recs)}")
        print()



In [230]:
evaluate_recommendations(make_recs_cosine, playlist_list)

Playlist 1
Average Cosine Similarity between input and unseen songs 0.996244244187846
Average Cosine Similarity between input and recommended songs 0.9986536613504886

Playlist 2
Average Cosine Similarity between input and unseen songs 0.99800853296789
Average Cosine Similarity between input and recommended songs 0.9989942539172595

Playlist 3
Average Cosine Similarity between input and unseen songs 0.9977441639644249
Average Cosine Similarity between input and recommended songs 0.998856750703676

Playlist 4
Average Cosine Similarity between input and unseen songs 0.9978213588636753
Average Cosine Similarity between input and recommended songs 0.9986885746103663

Playlist 5
Average Cosine Similarity between input and unseen songs 0.9980462797929787
Average Cosine Similarity between input and recommended songs 0.9988241912974556



In [231]:
evaluate_recommendations(make_recs_euclidean, playlist_list)

Playlist 1
Average Cosine Similarity between input and unseen songs 0.996244244187846
Average Cosine Similarity between input and recommended songs 0.9986046087791615

Playlist 2
Average Cosine Similarity between input and unseen songs 0.99800853296789
Average Cosine Similarity between input and recommended songs 0.9990324918504367

Playlist 3
Average Cosine Similarity between input and unseen songs 0.9977441639644249
Average Cosine Similarity between input and recommended songs 0.9988804239120851

Playlist 4
Average Cosine Similarity between input and unseen songs 0.9978213588636753
Average Cosine Similarity between input and recommended songs 0.998713889827439

Playlist 5
Average Cosine Similarity between input and unseen songs 0.9980462797929787
Average Cosine Similarity between input and recommended songs 0.9988496604159657



In [232]:
evaluate_recommendations(make_recs_kmeans, playlist_list)

Playlist 1
Average Cosine Similarity between input and unseen songs 0.996244244187846
Average Cosine Similarity between input and recommended songs 0.9981654845384642

Playlist 2
Average Cosine Similarity between input and unseen songs 0.99800853296789
Average Cosine Similarity between input and recommended songs 0.9985541505525928

Playlist 3
Average Cosine Similarity between input and unseen songs 0.9977441639644249
Average Cosine Similarity between input and recommended songs 0.9981440654121435

Playlist 4
Average Cosine Similarity between input and unseen songs 0.9978213588636753
Average Cosine Similarity between input and recommended songs 0.9984784362890367

Playlist 5
Average Cosine Similarity between input and unseen songs 0.9980462797929787
Average Cosine Similarity between input and recommended songs 0.9981486075863654

