In [1]:
import pandas as pd
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import preprocessing

df=pd.read_csv("../data/datasets/spotify.csv", sep=',')

df.head()
df.columns.to_list()

['title',
 'artist',
 'the genre of the track',
 'year',
 'Beats.Per.Minute -The tempo of the song',
 'Energy- The energy of a song - the higher the value, the more energtic',
 'Danceability - The higher the value, the easier it is to dance to this song',
 'Loudness/dB - The higher the value, the louder the song',
 'Liveness - The higher the value, the more likely the song is a live recording',
 'Valence - The higher the value, the more positive mood for the song',
 'Length - The duration of the song',
 'Acousticness - The higher the value the more acoustic the song is',
 'Speechiness - The higher the value the more spoken word the song contains',
 'Popularity- The higher the value the more popular the song is']

In [2]:


rename_map = {
  "title": "name",
  "the genre of the track" : "genre",
  "Popularity- The higher the value the more popular the song is" : "popularity",
  "Length - The duration of the song" : "duration",
  "Acousticness - The higher the value the more acoustic the song is" : "instrumental",
  "Valence - The higher the value, the more positive mood for the song" : "mood",
}

cols = list(rename_map.values())
df = df.rename(rename_map, axis='columns')[cols]
feature_cols = [
  "genre", 
  "popularity", "duration", "instrumental", "mood"
]

In [3]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normalized_df =scaler.fit_transform(df[feature_cols])

print(normalized_df[:2])

[[0.83838384 0.2862069  0.19191919 0.81632653]
 [0.82828283 0.44482759 0.24242424 0.65306122]]


In [4]:

# Create a pandas series with song titles as indices and indices as series values 
indices = pd.Series(df.index, index=df['name']).drop_duplicates()

# Create cosine similarity matrix based on given matrix
cosine = cosine_similarity(normalized_df)

def generate_recommendation(song_title, model_type=cosine ):
    """
    Purpose: Function for song recommendations 
    Inputs: song title and type of similarity model
    Output: Pandas series of recommended songs
    """
    # Get song indices
    index=indices[song_title]
    # Get list of songs for given songs
    score=list(enumerate(model_type[indices[song_title]]))
    # Sort the most similar songs
    similarity_score = sorted(score,key = lambda x:x[1],reverse = True)
    # Select the top-10 recommend songs
    similarity_score = similarity_score[1:11]
    top_songs_index = [i[0] for i in similarity_score]
    # Top 10 recommende songs
    top_songs=df['name'].iloc[top_songs_index]
    return top_songs

In [5]:
song_title = "Bad Romance"

print("\nCosine Kernel - Recommended Songs:")
print(generate_recommendation(song_title, cosine).values)

sig_kernel = sigmoid_kernel(normalized_df)

print("\n\nSigmoid Kernel - Recommended Songs:")
print(generate_recommendation(song_title, sig_kernel).values)


Cosine Kernel - Recommended Songs:
['Higher' 'Water Under the Bridge' 'Judas' 'Legendary Lovers'
 'A Little Party Never Killed Nobody (All We Got)'
 'A Little Party Never Killed Nobody (All We Got)'
 'No Brainer (feat. Justin Bieber, Chance the Rapper & Quavo)'
 'Steal My Girl' 'Boom Boom' 'Love On Top']


Sigmoid Kernel - Recommended Songs:
['Shape of You' 'Happy - From "Despicable Me 2"'
 "There's Nothing Holdin' Me Back"
 "I'm the One (feat. Justin Bieber, Quavo, Chance the Rapper & Lil Wayne)"
 'Rude' 'Lose Yourself to Dance' 'Sugar' 'Shake It Off' 'Sucker'
 'Blurred Lines']


: 