# Hybrid Model

In [22]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [23]:
# Load data
data = pd.read_csv('songsDataset.csv', nrows=10000)

In [43]:
# Preprocess data
data.columns = data.columns.str.strip().str.replace("'", "")


In [44]:
data.head()

Unnamed: 0,userID,songID,rating
0,0,7171,5
1,0,8637,4
2,0,21966,4
3,0,35821,5
4,0,82446,5


In [45]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   userID  10000 non-null  int64
 1   songID  10000 non-null  int64
 2   rating  10000 non-null  int64
dtypes: int64(3)
memory usage: 234.5 KB


In [46]:
data.isna().sum()

userID    0
songID    0
rating    0
dtype: int64

In [47]:
data.describe()

Unnamed: 0,userID,songID,rating
count,10000.0,10000.0,10000.0
mean,499.5,67940.1716,3.469
std,288.689425,39267.696423,1.546182
min,0.0,4.0,1.0
25%,249.75,34282.0,2.0
50%,499.5,67264.5,4.0
75%,749.25,101778.5,5.0
max,999.0,136728.0,5.0


In [34]:
# Create user-item matrix
user_item_matrix = data.pivot_table(index='userID', columns='songID', values='rating').fillna(0)
user_item_matrix.head()

songID,4,34,45,66,69,98,107,115,191,208,...,136507,136571,136586,136592,136623,136686,136714,136716,136719,136728
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
# Compute user similarities
user_similarity_matrix = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)


In [36]:
# Compute item similarities
item_similarity_matrix = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity_matrix, index=user_item_matrix.columns, columns=user_item_matrix.columns)


In [37]:
# Function to get similar users
def get_user_similarity(target_user, user_similarity_df, top_n=5):
    similar_scores = user_similarity_df[target_user].sort_values(ascending=False)
    similar_users = similar_scores.iloc[1:top_n+1].index.tolist()
    return similar_users

In [38]:
# Function to get similar items
def get_similar_items(songID, item_similarity_df, top_n=5):
    similar_scores = item_similarity_df[songID].sort_values(ascending=False)
    similar_items = similar_scores.iloc[1:top_n+1].index
    return similar_items

In [39]:
# Function to recommend songs using user-based CF
def recommend_songs_ubcf(userID, user_item_matrix, user_similarity_df, num_recs=5):
    similar_users = get_user_similarity(userID, user_similarity_df)
    recommended_songs = []
    for similar_userID in similar_users:
        for songID, rating in user_item_matrix.loc[similar_userID].items():
            if rating > 0:
                recommended_songs.append((songID, rating))
    recommended_songs = sorted(recommended_songs, key=lambda x: x[1], reverse=True)
    return [songID for songID, _ in recommended_songs[:num_recs]]

In [40]:
# Function to recommend songs using item-based CF
def recommend_songs_ibcf(userID, user_item_matrix, item_similarity_df, num_recs=5):
    user_ratings = user_item_matrix.loc[userID]
    recommended_songs = pd.Series(dtype=float)
    for songID, rating in user_ratings.items():
        if rating > 0:
            similar_items = get_similar_items(songID, item_similarity_df)
            for similar_item in similar_items:
                if similar_item in recommended_songs:
                    recommended_songs[similar_item] += rating
                else:
                    recommended_songs[similar_item] = rating
    recommended_songs = recommended_songs.sort_values(ascending=False)
    return recommended_songs.head(num_recs).index

In [41]:
# Hybrid recommendation function
def hybrid_recommendations(target_user, user_item_matrix, user_similarity_df, item_similarity_df, top_n=5, weight_ubcf=0.5, weight_ibcf=0.5):
    ubcf_recs = recommend_songs_ubcf(target_user, user_item_matrix, user_similarity_df, num_recs=top_n*2)
    ibcf_recs = recommend_songs_ibcf(target_user, user_item_matrix, item_similarity_df, num_recs=top_n*2)

    # Create a weighted score for hybrid recommendations
    recommendations = pd.Series(dtype=float)

    for song in ubcf_recs:
        if song in recommendations:
            recommendations[song] += weight_ubcf
        else:
            recommendations[song] = weight_ubcf

    for song in ibcf_recs:
        if song in recommendations:
            recommendations[song] += weight_ibcf
        else:
            recommendations[song] = weight_ibcf

    recommendations = recommendations.sort_values(ascending=False)
    return recommendations.head(top_n).index

In [42]:
# Test the hybrid model
target_user = int(input("Enter user ID: "))
recommended_songs = hybrid_recommendations(target_user, user_item_matrix, user_similarity_df, item_similarity_df)
print(f"Recommendations for user {target_user}: {recommended_songs}")

Enter user ID: 5
Recommendations for user 5: Index([131629, 11136, 89151, 24033, 32274], dtype='int64')
