In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.sparse as sp
from sklearn.neighbors import NearestNeighbors

In [15]:
df_music = pd.read_csv('Song_data.csv')
df_music.tail()



Unnamed: 0,artist,song,link,text,user_id,frequency,song_id
40127,Glen Campbell,Tomorrow Never Comes,/g/glen+campbell/tomorrow+never+comes_20321520...,Oh you tell me that you love me \nYes you tel...,6REhHrdY,2.0,D3SKCEF4
40128,Kirsty Maccoll,Tomorrow Never Comes,/k/kirsty+maccoll/tomorrow+never+comes_2007962...,I watch you lie asleep \nWatch you breathing ...,EUvdRKtX,2.0,D3SKCEF4
40129,Glen Campbell,Truck Driving Man,/g/glen+campbell/truck+driving+man_20822122.html,"Well, I stopped at a roadhouse in Texas \nWel...",ew8t6zBy,1.0,YQEDR9C5
40130,Glen Campbell,Try A Little Kindness,/g/glen+campbell/try+a+little+kindness_2032903...,If you see your brother standing by the road ...,skR5hL31,1.0,MUB3FMWK
40131,Glen Campbell,"Turn Around, Look At Me",/g/glen+campbell/turn+around+look+at+me_208452...,"There is someone walking behind you, \nTurn a...",NHXnVXhT,1.0,HICZYJH5


In [4]:
user_ids = df_music['user_id'].astype('category').cat.codes
song_ids = df_music['song_id'].astype('category').cat.codes

In [5]:
user_song_matrix = df_music.pivot_table(index='user_id', columns='song_id', values='frequency', fill_value=0)

In [7]:
average_listens = np.mean(user_song_matrix.values)
median_listens = np.median(user_song_matrix.values)
min_listens = np.min(user_song_matrix.values)
max_listens = np.max(user_song_matrix.values)

print(f"Average number of songs listened to per user: {average_listens}")
print(f"Median number of songs listened to per user: {median_listens}")
print(f"Min number of songs listened to: {min_listens}, Max number of songs listened to: {max_listens}")


Average number of songs listened to per user: 0.0004228775751974263
Median number of songs listened to per user: 0.0
Min number of songs listened to: 0, Max number of songs listened to: 35


In [9]:
unique_songs_count = df_music['song_id'].nunique()
unique_users_count = df_music['user_id'].nunique()


In [10]:
total_possible_entries = unique_users_count * unique_songs_count
non_zero_entries = df_music.shape[0]
zero_entries = total_possible_entries - non_zero_entries
sparsity = (zero_entries / total_possible_entries) * 100
print(f"The user-song matrix has {zero_entries} zero values, leading to a sparsity of {sparsity:.2f}%.")


The user-song matrix has 258765268 zero values, leading to a sparsity of 99.98%.


In [11]:
class MusicRecommender:
    def __init__(self, metric='cosine', algorithm='brute', k=20, data=None, decode_song_id=None):
        self.metric = metric
        self.algorithm = algorithm
        self.k = k
        self.data = data
        self.decode_song_id = decode_song_id
        self.model = NearestNeighbors(metric=self.metric, algorithm=self.algorithm, n_neighbors=self.k, n_jobs=-1)
        self.model.fit(data)

    def recommend(self, target_song, n_recommendations=10):
        # Find the index of the song in the matrix
        song_index = self.decode_song_id[target_song]
        distances, indices = self.model.kneighbors(self.data[song_index].reshape(1, -1), n_neighbors=n_recommendations + 1)
        
        # Get recommendations
        recommendations = []
        for i in range(1, len(distances.flatten())):
            recommendations.append(list(self.decode_song_id.keys())[list(self.decode_song_id.values()).index(indices.flatten()[i])])
        
        return recommendations

In [12]:
decode_song_id = {song: idx for idx, song in enumerate(list(df_music['song'].unique()))}

In [13]:
music_recommender = MusicRecommender(metric='cosine', algorithm='brute', k=20, data=user_song_matrix.values, decode_song_id=decode_song_id)


In [14]:
target_song_title = 'As Good As New' 
recommended_songs = music_recommender.recommend(target_song=target_song_title, n_recommendations=10)

print(f"Recommended songs for '{target_song_title}':\n{recommended_songs}")

Recommended songs for 'As Good As New':
['We Came Up', 'Ball And Chain For Sale', 'Pretty Paper', "Miss Judy's Farm", "You'll Never See Me Again", 'Heartsong', 'I Need You For Someone', 'Dreamtaker', 'Like It Or Not', 'Papa He Said']


In [16]:

target_song_title = 'Bang' 
recommended_songs = music_recommender.recommend(target_song=target_song_title, n_recommendations=10)

print(f"Recommended songs for '{target_song_title}':\n{recommended_songs}")

Recommended songs for 'Bang':
['Cygnet Committee', 'Raindrops Keep Falling On My Head', 'Taking It All Too Hard', 'The Guaranteed Eternal Sanctuary Man', "It's Yourself", 'Let Us Now Make Love', 'Like It Or Not', 'Man On The Corner', 'Papa He Said', 'In The Beginning']
