In [15]:
# Import libraries
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from recommender import recommender
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import creds  # Import
import requests
from euclidean_recommender import recommender_euclidean
from kmeans_recommender import main
from sklearn.cluster import KMeans

In [16]:
# Compute the cosine similarity between the input song, and the recommended songs

# Compute the cosine similarity between the input song, and songs in the playlist that the recommendation system didn't see

# Calculate the average cosine similarity between the input song and other songs in the playlist that the recommendation system didn't see

# Compare the average cosine similarties

Make sure each recommendation system can generate recs

In [19]:
# Import each playlist representing a specific mood
drake_sad = pd.read_csv('playlist_1.csv')
drake_hype = pd.read_csv('playlist_2.csv')
drake_chill = pd.read_csv('playlist_3.csv')
drake_romantic = pd.read_csv('playlist_4.csv')
drake_party = pd.read_csv('playlist_5.csv')

In [20]:
playlist_list = [drake_sad, drake_hype, drake_chill, drake_romantic, drake_party]

In [27]:
drake_sad[drake_sad['artist'].str.contains('Drake')].shape[0]

36

In [38]:
for playlist in playlist_list:
    print(f"Rows that contain Drake songs: {playlist[playlist['artist'].str.contains('Drake')].shape[0]}")
    print(f"Total rows: {playlist.shape[0]}")


Rows that contain Drake songs: 36
Total rows: 36
Rows that contain Drake songs: 120
Total rows: 120
Rows that contain Drake songs: 220
Total rows: 220
Rows that contain Drake songs: 279
Total rows: 279
Rows that contain Drake songs: 348
Total rows: 348


Rows that contain Drake songs: 36
Total rows: 36


In [5]:
#Read in dataset containing all drake songs
drake_df = pd.read_csv('drake_songs_dataset.csv')

#Get desired audio features
selected_features = [
    'danceability', 'energy', 'key', 'loudness',
    'speechiness', 'acousticness', 'instrumentalness', 
    'liveness', 'valence', 'tempo'
]

In [6]:
#Function that scales all data before computing cosine similarity matrix
def scale_data(input_song, drake_df):
    # Making a copy to not alter drake_df
    recommender_dataset = drake_df.copy()
    
    # Removing input song from recommender_dataset so it isn't recommended
    recommender_dataset = recommender_dataset[recommender_dataset['track_uri'] != input_song['track_uri']]

    #Getting only necessary columns before concat
    recommender_dataset = recommender_dataset[selected_features].copy()
    input_song = input_song[selected_features].copy().to_frame().T
    
    #Combining rows for features scaling
    all_features = pd.concat([input_song, recommender_dataset])
    scaler = StandardScaler()
    all_features_scaled = scaler.fit_transform(all_features)

    user_features = all_features_scaled[:1, :].copy()
    dataset_features = all_features_scaled[1:, :].copy()
    
    return user_features, dataset_features
    
        

In [7]:
# Function that runs the recommendation system using cosine similarity
def make_recs_cosine(input_song_index, playlist_df, drake_df):
    #Keeps all columns so that we can extract the recommended song names and artists later
    df_all_cols = drake_df.copy()

    # Get the input song that we will make recommendations from
    input_song = playlist_df.iloc[input_song_index]

    #Remove the user's inputted track from original dataset so it isn't recommended later on
    drake_df =  drake_df[drake_df['track_uri'] != input_song['track_uri']]

    #Scale data
    user_features, dataset_features = scale_data(input_song, drake_df)

    # Recommending system
    return recommender(user_features, dataset_features, df_all_cols, 5)
    

In [8]:
# Function that runs the recommendation system using Euclidean Distance
def make_recs_euclidean(input_song_index, playlist_df, drake_df):
    #Keeps all columns so that we can extract the recommended song names and artists later
    df_all_cols = drake_df.copy()

    # Get the input song that we will make recommendations from
    input_song = playlist_df.iloc[input_song_index]

    #Remove the user's inputted track from original dataset so it isn't recommended later on
    drake_df =  drake_df[drake_df['track_uri'] != input_song['track_uri']]

    #Scale data
    user_features, dataset_features = scale_data(input_song, drake_df)

    # Recommending system
    return recommender_euclidean(user_features, dataset_features, df_all_cols, 5)
    

In [9]:
# Function that runs recommendation system using KMeans
def make_recs_kmeans(input_song_index, playlist_df, df):

    # Scale data
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df[selected_features])

    # Perform KMeans
    num_clusters = 5
    kmeans = KMeans(n_clusters = num_clusters, random_state = 42, n_init = 10)
    kmeans.fit(scaled_features)

    # Identify the cluster to which the target song belongs
    target_song_features = scaled_features[input_song_index]  # Replace target_song_index with the index of your target song
    target_song_cluster = kmeans.predict([target_song_features])[0]
   
    # Find songs in the same cluster as the target song
    playlist_df['cluster'] = kmeans.labels_
    
    songs_in_same_cluster = playlist_df[playlist_df['cluster'] == target_song_cluster]

    # Now you can recommend songs from the same cluster
    recommended_songs = songs_in_same_cluster.sample(n=5, replace = True)
    recommended_songs = recommended_songs.drop_duplicates()
    return recommended_songs
    

In [10]:
from statistics import mean
from numpy import mean

In [11]:
def evaluate_recommendations(model, playlist_list, is_kmeans):
    all_similarity_unseen_total = []  # Accumulator for all playlists
    all_similarity_recs_total = []    # Accumulator for all playlists
    
    for i, playlist in enumerate(playlist_list):
        # Split the data into input songs and songs that won't be seen
        input_group, unseen_group = train_test_split(playlist, test_size=0.3, random_state=42)

        # Initialize empty np arrays that will hold the cosine similarity scores
        all_similarity_unseen = []
        all_similarity_recs = []

        # Loop through all songs in the input_group
        for input_song_index, input_song in input_group.iterrows():
            # Make recommendations with the model we are testing
            if is_kmeans:
                recs = model(input_song_index, drake_df, drake_df)
            else:
                recs = model(input_song_index, playlist, drake_df)

            # Extract audio features from input song, unseen group, and recommended songs
            input_features = input_song[selected_features].values.reshape(1, -1)
            unseen_group_features = unseen_group[selected_features].values
            recs_features = recs[selected_features].values

            # Calculate cosine similarities
            similarities_unseen = cosine_similarity(input_features, unseen_group_features).flatten().tolist()
            all_similarity_unseen += similarities_unseen
            
            similarities_recs = cosine_similarity(input_features, recs_features).flatten().tolist()
            all_similarity_recs += similarities_recs

        # Calculate the average cosine similarity for the current playlist
        avg_similarity_unseen = mean(all_similarity_unseen)
        avg_similarity_recs = mean(all_similarity_recs)
        
        # Accumulate the results for all playlists
        all_similarity_unseen_total.append(avg_similarity_unseen)
        all_similarity_recs_total.append(avg_similarity_recs)

        # Print the results for the current playlist
        print(f"Playlist {i + 1}")
        print(f"Average Cosine Similarity between input and unseen songs: {avg_similarity_unseen}")
        print(f"Average Cosine Similarity between input and recommended songs: {avg_similarity_recs}")
        print()

    # Calculate the overall average cosine similarity across all playlists
    overall_avg_similarity_unseen = mean(all_similarity_unseen_total)
    overall_avg_similarity_recs = mean(all_similarity_recs_total)
    
    print("Overall Results")
    print(f"Overall Average Cosine Similarity between input and unseen songs: {overall_avg_similarity_unseen}")
    print(f"Overall Average Cosine Similarity between input and recommended songs: {overall_avg_similarity_recs}")


In [12]:
evaluate_recommendations(make_recs_cosine, playlist_list, False)

Playlist 1
Average Cosine Similarity between input and unseen songs: 0.996459396774986
Average Cosine Similarity between input and recommended songs: 0.9986536613504886



Playlist 2
Average Cosine Similarity between input and unseen songs: 0.9976324246799106
Average Cosine Similarity between input and recommended songs: 0.9989942539172595

Playlist 3
Average Cosine Similarity between input and unseen songs: 0.9973879816082651
Average Cosine Similarity between input and recommended songs: 0.9988567507036757

Playlist 4
Average Cosine Similarity between input and unseen songs: 0.99738304909688
Average Cosine Similarity between input and recommended songs: 0.9986885746103663

Playlist 5
Average Cosine Similarity between input and unseen songs: 0.9974868629854471
Average Cosine Similarity between input and recommended songs: 0.9988241912974557

Overall Results
Overall Average Cosine Similarity between input and unseen songs: 0.9972699430290979
Overall Average Cosine Similarity between input and recommended songs: 0.9988034863758491


In [13]:
evaluate_recommendations(make_recs_euclidean, playlist_list, False)

Playlist 1
Average Cosine Similarity between input and unseen songs: 0.996459396774986
Average Cosine Similarity between input and recommended songs: 0.9986046087791616

Playlist 2
Average Cosine Similarity between input and unseen songs: 0.9976324246799106
Average Cosine Similarity between input and recommended songs: 0.9990324918504367

Playlist 3
Average Cosine Similarity between input and unseen songs: 0.9973879816082651
Average Cosine Similarity between input and recommended songs: 0.998880423912085

Playlist 4
Average Cosine Similarity between input and unseen songs: 0.99738304909688
Average Cosine Similarity between input and recommended songs: 0.998713889827439

Playlist 5
Average Cosine Similarity between input and unseen songs: 0.9974868629854471
Average Cosine Similarity between input and recommended songs: 0.9988496604159658

Overall Results
Overall Average Cosine Similarity between input and unseen songs: 0.9972699430290979
Overall Average Cosine Similarity between input a

In [14]:
evaluate_recommendations(make_recs_kmeans, playlist_list, True)

Playlist 1
Average Cosine Similarity between input and unseen songs: 0.996459396774986
Average Cosine Similarity between input and recommended songs: 0.9974527752341593

Playlist 2
Average Cosine Similarity between input and unseen songs: 0.9976324246799106
Average Cosine Similarity between input and recommended songs: 0.9977138245181268

Playlist 3
Average Cosine Similarity between input and unseen songs: 0.9973879816082651
Average Cosine Similarity between input and recommended songs: 0.9976694866697086

Playlist 4
Average Cosine Similarity between input and unseen songs: 0.99738304909688
Average Cosine Similarity between input and recommended songs: 0.9974997429117303



IndexError: index 340 is out of bounds for axis 0 with size 295