In [32]:
import pandas as pd

In [33]:
data = pd.read_csv('songsDataset.csv',nrows=10000)

In [34]:
data.shape

(10000, 3)

In [35]:
data.head()

Unnamed: 0,'userID','songID','rating'
0,0,7171,5
1,0,8637,4
2,0,21966,4
3,0,35821,5
4,0,82446,5


In [36]:
data.describe()

Unnamed: 0,'userID','songID','rating'
count,10000.0,10000.0,10000.0
mean,499.5,67940.1716,3.469
std,288.689425,39267.696423,1.546182
min,0.0,4.0,1.0
25%,249.75,34282.0,2.0
50%,499.5,67264.5,4.0
75%,749.25,101778.5,5.0
max,999.0,136728.0,5.0


In [37]:
data.isnull().sum()

'userID'    0
'songID'    0
'rating'    0
dtype: int64

In [38]:
# data.dropna(inplace=True)

In [39]:
data.isnull().sum()

'userID'    0
'songID'    0
'rating'    0
dtype: int64

In [40]:
data.duplicated().sum()

0

Convert the dataset into a matrix where rows represent users, columns represent songs, and cells represent ratings

In [41]:
data.head()

Unnamed: 0,'userID','songID','rating'
0,0,7171,5
1,0,8637,4
2,0,21966,4
3,0,35821,5
4,0,82446,5


In [42]:
print(data.columns)


Index([''userID'', ''songID'', ''rating''], dtype='object')


In [43]:
# show rating column
data["'rating'"].head()

0    5
1    4
2    4
3    5
4    5
Name: 'rating', dtype: int64

In [44]:
item_matrix = data.pivot_table(index="'userID'", columns="'songID'", values="'rating'")

Fill missing values with zeros

In [45]:
item_matrix.isnull().sum()

'songID'
4         999
34        999
45        999
66        999
69        998
         ... 
136686    999
136714    999
136716    999
136719    999
136728    999
Length: 7389, dtype: int64

In [46]:
item_matrix.head()

'songID',4,34,45,66,69,98,107,115,191,208,...,136507,136571,136586,136592,136623,136686,136714,136716,136719,136728
'userID',Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,3.0,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


In [47]:
item_matrix.fillna(0, inplace=True)

Compute the cosine similarity between items

In [48]:
from sklearn.metrics.pairwise import cosine_similarity

In [49]:
item_similarity = cosine_similarity(item_matrix.T)

In [50]:
# containing the similarity scores between items (songs).
item_similarity_df = pd.DataFrame(item_similarity, index=item_matrix.columns, columns=item_matrix.columns)

Create a function to get similar items

In [51]:
def get_similar_items(song_id, item_similarity_df, top_n=5):
    similar_scores = item_similarity_df[song_id].sort_values(ascending=False)
    similar_items = similar_scores.iloc[1:top_n+1].index
    return similar_items


In [52]:
def recommend_songs(user_id, item_matrix, item_similarity_df, top_n=5):

    user_ratings = item_matrix.loc[user_id]
    user_ratings = user_ratings[user_ratings > 0]

    recommendations = pd.Series(dtype=float)
    for song, rating in user_ratings.items():
        similar_items = get_similar_items(song, item_similarity_df, top_n)
        for similar_item in similar_items:
            if similar_item in recommendations:
                recommendations[similar_item] += rating
            else:
                recommendations[similar_item] = rating

    recommendations = recommendations.sort_values(ascending=False)
    return recommendations.head(top_n).index

In [53]:
user_id = int(input("Enter UserID : "))
recommended_songs = recommend_songs(user_id, item_matrix, item_similarity_df)
print("Recommended songs for user", user_id, ":", recommended_songs)

Enter UserID : 5
Recommended songs for user 5 : Index([11136, 127192, 94523, 63537, 126631], dtype='int64')


## Using sparse matrix

In [54]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity

In [55]:
# Load and prepare the data
data = pd.read_csv('songsDataset.csv',nrows=10000)

In [56]:
# Create a sparse matrix from the data
sparse_item_matrix = csr_matrix((data["'rating'"].values, (data["'userID'"].values, data["'songID'"].values)))

In [57]:
# Compute the cosine similarity between items
item_similarity = cosine_similarity(sparse_item_matrix.T)

In [58]:
# Create a function to get similar items
def get_similar_items(song_id, item_similarity, top_n=5):
    similar_scores = item_similarity[song_id].flatten()
    top_n_indices = similar_scores.argsort()[:-top_n-1:-1][1:]
    return top_n_indices

In [59]:
# Create a function to recommend songs
def recommend_songs(user_id, sparse_item_matrix, item_similarity, top_n=5):
    user_ratings = sparse_item_matrix[user_id].toarray().flatten()
    user_ratings = [rating for rating in user_ratings if rating > 0]
    song_ids = [i for i, rating in enumerate(user_ratings) if rating > 0]

    recommendations = {}
    for song_id in song_ids:
        similar_items = get_similar_items(song_id, item_similarity, top_n)
        for similar_item in similar_items:
            if similar_item in recommendations:
                recommendations[similar_item] += user_ratings[song_id]
            else:
                recommendations[similar_item] = user_ratings[song_id]

    recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    return [song_id for song_id, _ in recommendations[:top_n]]

In [60]:
# Get user ID from input
user_id = int(input("Enter UserID : "))

# Make recommendations
recommended_songs = recommend_songs(user_id, sparse_item_matrix, item_similarity)
print("Recommended songs for user", user_id, ":", recommended_songs)

Enter UserID : 5
Recommended songs for user 5 : [45580, 45566, 45567, 45568, 128509]
