#### Load libraries

In [1]:
import pandas as pd
import numpy as np 
from sklearn.metrics.pairwise import cosine_similarity

from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import random

#### Import data

In [2]:
# Load liked songs from API extraction
# (most recent 100 user liked songs)
liked_songs = pd.read_csv('Project Data/liked_songs.csv')
(liked_songs.head())

Unnamed: 0,id,name,artists,album,release_date,popularity,danceability,energy,key,loudness,...,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
0,2NFadq6pUeiVEihLvUlOSr,"Rauw Alejandro: Bzrp Music Sessions, Vol. 56","Bizarrap, Rauw Alejandro","Rauw Alejandro: Bzrp Music Sessions, Vol. 56",2023-06-21,65,0.774,0.572,11,-6.0,...,0.00326,0.315,0.489,128.01,audio_features,spotify:track:2NFadq6pUeiVEihLvUlOSr,https://api.spotify.com/v1/tracks/2NFadq6pUeiV...,https://api.spotify.com/v1/audio-analysis/2NFa...,191254,4
1,3A1BJKGMsa8JqO8M8zztyq,Me Gustas Tu,Manu Chao,Me Gustas Tu EP,2023-02-13,59,0.657,0.742,11,-6.822,...,0.493,0.222,0.909,175.943,audio_features,spotify:track:3A1BJKGMsa8JqO8M8zztyq,https://api.spotify.com/v1/tracks/3A1BJKGMsa8J...,https://api.spotify.com/v1/audio-analysis/3A1B...,239987,4
2,2lwwrWVKdf3LR9lbbhnr6R,Float On,Modest Mouse,Good News For People Who Love Bad News,2004-04-05,71,0.649,0.888,6,-4.807,...,2e-06,0.0888,0.553,100.975,audio_features,spotify:track:2lwwrWVKdf3LR9lbbhnr6R,https://api.spotify.com/v1/tracks/2lwwrWVKdf3L...,https://api.spotify.com/v1/audio-analysis/2lww...,208467,4
3,77RNVzAVwqWiZrZuWqMV2i,Nancy From Now On,Father John Misty,Fear Fun,2012-05-01,61,0.535,0.646,0,-9.168,...,0.0406,0.0908,0.501,107.078,audio_features,spotify:track:77RNVzAVwqWiZrZuWqMV2i,https://api.spotify.com/v1/tracks/77RNVzAVwqWi...,https://api.spotify.com/v1/audio-analysis/77RN...,234160,4
4,6uW4QhvYTl6q24OBbWTt9s,Convite,Lamparina,Manda Dizer,2018-08-06,44,0.47,0.621,11,-7.369,...,0.0,0.127,0.618,117.952,audio_features,spotify:track:6uW4QhvYTl6q24OBbWTt9s,https://api.spotify.com/v1/tracks/6uW4QhvYTl6q...,https://api.spotify.com/v1/audio-analysis/6uW4...,213384,4


In [3]:
# Take average of each of the audio features to get user vector
popularity_avg = liked_songs['popularity'].mean()
danceability_avg = liked_songs['danceability'].mean()
energy_avg = liked_songs['energy'].mean()
key_avg = liked_songs['key'].mean()
loudness_avg = liked_songs['loudness'].mean()
mode_avg = liked_songs['mode'].mean()
speechiness_avg = liked_songs['speechiness'].mean()
acousticness_avg = liked_songs['acousticness'].mean()
instrumentalness_avg = liked_songs['instrumentalness'].mean()
liveness_avg = liked_songs['liveness'].mean()
valence_avg = liked_songs['valence'].mean()
tempo_avg = liked_songs['tempo'].mean()
duration_avg = liked_songs['duration_ms'].mean()
time_avg = liked_songs['time_signature'].mean()

In [4]:
user_vector = [popularity_avg,danceability_avg,energy_avg,key_avg,loudness_avg,mode_avg,speechiness_avg,acousticness_avg,
                instrumentalness_avg,liveness_avg,valence_avg,tempo_avg,duration_avg,time_avg]
user_vector

[58.63,
 0.59393,
 0.58185,
 5.21,
 -8.726320000000001,
 0.71,
 0.05485800000000001,
 0.36146888,
 0.09866979729999999,
 0.150994,
 0.50223,
 119.84297999999997,
 216932.31,
 3.9]

In [5]:
# Load Christmas playlist data from Spotify API extraction
xmas_data = pd.read_csv('Project Data/xmas_data.csv')
(xmas_data.head())

Unnamed: 0,id,name,artists,album,release_date,popularity,danceability,energy,key,loudness,...,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
0,2EjXfH91m7f8HiJN1yQg97,Rockin' Around The Christmas Tree,Brenda Lee,Merry Christmas From Brenda Lee,1964-10-19,83,0.598,0.47,8,-8.744,...,0.0,0.505,0.879,67.086,audio_features,spotify:track:2EjXfH91m7f8HiJN1yQg97,https://api.spotify.com/v1/tracks/2EjXfH91m7f8...,https://api.spotify.com/v1/audio-analysis/2EjX...,126267,4
1,7vQbuQcyTflfCIOu3Uzzya,Jingle Bell Rock,Bobby Helms,Jingle Bell Rock/Captain Santa Claus (And His ...,1957-12-02,80,0.754,0.424,2,-8.463,...,0.0,0.0652,0.806,119.705,audio_features,spotify:track:7vQbuQcyTflfCIOu3Uzzya,https://api.spotify.com/v1/tracks/7vQbuQcyTflf...,https://api.spotify.com/v1/audio-analysis/7vQb...,130973,4
2,5hslUAKq9I9CG2bAulFkHN,It's the Most Wonderful Time of the Year,Andy Williams,The Andy Williams Christmas Album,1963-11-24,80,0.24,0.598,7,-8.435,...,0.0,0.117,0.776,201.629,audio_features,spotify:track:5hslUAKq9I9CG2bAulFkHN,https://api.spotify.com/v1/tracks/5hslUAKq9I9C...,https://api.spotify.com/v1/audio-analysis/5hsl...,151933,3
3,46pF1zFimM582ss1PrMy68,Christmas (Baby Please Come Home),Darlene Love,A Christmas Gift For You From Phil Spector,1963,66,0.325,0.759,8,-7.146,...,0.000362,0.0773,0.36,126.627,audio_features,spotify:track:46pF1zFimM582ss1PrMy68,https://api.spotify.com/v1/tracks/46pF1zFimM58...,https://api.spotify.com/v1/audio-analysis/46pF...,166453,4
4,4PS1e8f2LvuTFgUs1Cn3ON,The Christmas Song (Merry Christmas To You),Nat King Cole,The Christmas Song (Expanded Edition),1962,75,0.32,0.21,6,-15.231,...,8e-05,0.138,0.208,73.196,audio_features,spotify:track:4PS1e8f2LvuTFgUs1Cn3ON,https://api.spotify.com/v1/tracks/4PS1e8f2LvuT...,https://api.spotify.com/v1/audio-analysis/4PS1...,192160,5


In [6]:
xmas_matrix = xmas_data.drop(['artists', 'name','album','release_date','type','uri','track_href','analysis_url'], axis=1)
# Number of rows
print(xmas_matrix.shape)
xmas_matrix.head()

(1960, 15)


Unnamed: 0,id,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,2EjXfH91m7f8HiJN1yQg97,83,0.598,0.47,8,-8.744,1,0.0496,0.617,0.0,0.505,0.879,67.086,126267,4
1,7vQbuQcyTflfCIOu3Uzzya,80,0.754,0.424,2,-8.463,1,0.0363,0.643,0.0,0.0652,0.806,119.705,130973,4
2,5hslUAKq9I9CG2bAulFkHN,80,0.24,0.598,7,-8.435,1,0.0369,0.766,0.0,0.117,0.776,201.629,151933,3
3,46pF1zFimM582ss1PrMy68,66,0.325,0.759,8,-7.146,1,0.053,0.395,0.000362,0.0773,0.36,126.627,166453,4
4,4PS1e8f2LvuTFgUs1Cn3ON,75,0.32,0.21,6,-15.231,1,0.0345,0.92,8e-05,0.138,0.208,73.196,192160,5


In [7]:
# Take the id variable out of the data frame so we have a matrix of all audio features
xmas_matrix.set_index('id', inplace=True)
xmas_matrix.head()

Unnamed: 0_level_0,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2EjXfH91m7f8HiJN1yQg97,83,0.598,0.47,8,-8.744,1,0.0496,0.617,0.0,0.505,0.879,67.086,126267,4
7vQbuQcyTflfCIOu3Uzzya,80,0.754,0.424,2,-8.463,1,0.0363,0.643,0.0,0.0652,0.806,119.705,130973,4
5hslUAKq9I9CG2bAulFkHN,80,0.24,0.598,7,-8.435,1,0.0369,0.766,0.0,0.117,0.776,201.629,151933,3
46pF1zFimM582ss1PrMy68,66,0.325,0.759,8,-7.146,1,0.053,0.395,0.000362,0.0773,0.36,126.627,166453,4
4PS1e8f2LvuTFgUs1Cn3ON,75,0.32,0.21,6,-15.231,1,0.0345,0.92,8e-05,0.138,0.208,73.196,192160,5


In [8]:
# CSR Matrix to save memory - compress matrix
data_matrix = csr_matrix(xmas_matrix.values)

In [9]:
# KNN model using cosine similarity
model_knn = NearestNeighbors(metric = "cosine", algorithm = "brute")
model_knn.fit(data_matrix)

#### Recommend songs based on an input song by user:

In [10]:
id='7xapw9Oy21WpfEcib2ErSA' # Mistletoe
distance, indice = model_knn.kneighbors([xmas_matrix.loc[id].values], n_neighbors=6)

print(distance)
print(indice)

[[0.00000000e+00 7.48548556e-10 7.93865418e-10 1.01252873e-09
  1.22095511e-09 1.25255384e-09]]
[[109 139 115  41  96  75]]


In [11]:
# prints 
rec_list = xmas_matrix.iloc[indice[0]].index.values.tolist()
xmas_data[xmas_data['id'].isin(rec_list)][['name','artists']]

Unnamed: 0,name,artists
41,Run Rudolph Run,Chuck Berry
75,Run Rudolph Run - Spotify Singles Holiday,Mark Ambor
96,Let It Snow! Let It Snow! Let It Snow! (with T...,"Frank Sinatra, B. Swanson Quartet"
109,Mistletoe,Justin Bieber
115,A Nonsense Christmas,Sabrina Carpenter
139,A Nonsense Christmas,Sabrina Carpenter


#### Recommendations based on liked songs data:

In [12]:
vector = np.array(user_vector)

In [13]:
import os
default_n_threads = 1
os.environ['OPENBLAS_NUM_THREADS'] = f"{default_n_threads}"
os.environ['MKL_NUM_THREADS'] = f"{default_n_threads}"
os.environ['OMP_NUM_THREADS'] = f"{default_n_threads}"
import sys


In [14]:
# Reshape liked_features into a 2D array (to match the shape of the playlist data)
liked_features = vector.reshape(1, -1)

# Calculate cosine similarity between the liked_features and each song in the playlist
similarities = cosine_similarity(liked_features, xmas_matrix)

# Get the indices of the most similar songs (top N recommendations)
N = 6  # Number of songs to recommend
top_indices = similarities[0].argsort()[-N:][::-1]  # Sort indices by similarity, descending order

# Get the top N recommended songs
recommended_songs = xmas_matrix.iloc[top_indices]

print("Recommended Songs:")
print(recommended_songs)

Recommended Songs:
                        popularity  danceability  energy  key  loudness  mode  \
id                                                                              
0UOG0zUn7t8m8QcxfzR7AH          57         0.531   0.727    2    -9.268     1   
4fzyvSu73BhGvi96p2zwjL          68         0.315   0.418    7    -9.146     1   
0jXjTHqZVXhQSGUAvUbCvU          61         0.310   0.302    3   -14.485     1   
04DYwFIKeq2Bkn9aqSI9PC          53         0.270   0.254    4   -12.639     1   
2xXCYeiWBTy1h829foY6i1          42         0.614   0.782    1    -4.925     1   
7us9KL6g1bLBAoUSVu7Qzb          43         0.578   0.608    6    -6.961     1   

                        speechiness  acousticness  instrumentalness  liveness  \
id                                                                              
0UOG0zUn7t8m8QcxfzR7AH       0.0340         0.208          0.934000    0.0912   
4fzyvSu73BhGvi96p2zwjL       0.0299         0.131          0.000679    0.0836   
0jXjTHqZ

In [15]:
rec_ids = recommended_songs.index.tolist()

In [16]:
xmas_data[xmas_data['id'].isin(rec_ids)][['name','artists']]

Unnamed: 0,name,artists
55,Adeste Fideles,Bing Crosby
97,When a Child Is Born,Johnny Mathis
132,Christmas Lights,Coldplay
289,Lit This Year,Florida Georgia Line
290,Have Yourself A Merry Little Christmas,Kacey Musgraves
396,"Merry Christmas, Please Don't Call",Bleachers
