In [5]:
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#read data from the csv file
initial_dataset = pd.read_csv('spotify_millionsongdata.csv')

#reduce the sample size to 6000
dataset = initial_dataset.sample(n=6000).drop('link', axis=1).reset_index(drop=True)

dataset['text'] = dataset['text'].str.replace('\r\n', '')

#use tf_idf to represent data quantitatively
tf_idf_scores = TfidfVectorizer(analyzer='word', stop_words='english')
tf_idf_matrix = tf_idf_scores.fit_transform(dataset['text'])

#use cosine similarity to calculate similarity scores
cosinesimilarities = cosine_similarity(tf_idf_matrix)

similarities = {}
for i in range(len(cosinesimilarities)):
   similarity_index_set = cosinesimilarities[i].argsort()[:-50:-1]
   similarities[dataset['song'].iloc[i]] = [(cosinesimilarities[i][x], dataset['song'][x], dataset['artist'][x]) for x in similarity_index_set][1:]


class RecommendationByContent:
    def __init__(self, matrix):
        self.matrix_similar = matrix

    def print_recommendation(self, song, recommendation_song):
        recommendations_count = len(recommendation_song)

        print()
        print(f'The {recommendations_count} recommended songs for {song} are:')
        print()
        for i in range(recommendations_count):
            print(f"Recommended song {i+1}:")
            print(f"{recommendation_song[i][1]} by {recommendation_song[i][2]} with a similarity score of {round(recommendation_song[i][0], 4)}\n")


    def recommend_song(self, recomm):
        song = recomm['song_for_recommendation']
        number_songs = recomm['number_of_recommendations']
        recommendation_song = self.matrix_similar[song][:number_songs]
        self.print_recommendation(song = song, recommendation_song = recommendation_song)

recommended_songs = RecommendationByContent(similarities)

recomm_song = {
    "song_for_recommendation": dataset['song'].iloc[0],
    "number_of_recommendations": 5
}

recommended_songs.recommend_song(recomm_song)


The 5 recommended songs for If I Could Change Your Mind are:

Recommended song 1:
Don't Ever Change by Kinks with a similarity score of 0.3998

Recommended song 2:
You'll Always Find Your Way Back Home by Miley Cyrus with a similarity score of 0.3391

Recommended song 3:
Change Of Heart by Diana Ross with a similarity score of 0.3252

Recommended song 4:
Things Change by Tim McGraw with a similarity score of 0.2965

Recommended song 5:
I Wouldn't Change The Man He Is by Diana Ross with a similarity score of 0.2411

