In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Load the data
data = pd.read_csv("book.csv", encoding='latin-1')

# Pivot table to handle duplicate entries and create a user-item matrix
user_item_matrix = data.pivot_table(index='User.ID', columns='Book.Title', values='Book.Rating', aggfunc='mean').fillna(0)

print(data.head())
print(user_item_matrix[:5])

   Unnamed: 0  User.ID                                         Book.Title  \
0           1   276726                                Classical Mythology   
1           2   276729                                       Clara Callan   
2           3   276729                               Decision in Normandy   
3           4   276736  Flu: The Story of the Great Influenza Pandemic...   
4           5   276737                             The Mummies of Urumchi   

   Book.Rating  
0            5  
1            3  
2            6  
3            8  
4            6  
Book.Title   Jason, Madison &amp   Other Stories;Merril;1985;McClelland &amp  \
User.ID                                                                        
8                            0.0                                         0.0   
9                            0.0                                         0.0   
10                           0.0                                         0.0   
12                           0.0   

In [2]:
# Normalize the user-item matrix using Min-Max scaling
scaler = MinMaxScaler()
normalized_matrix = scaler.fit_transform(user_item_matrix)

# Compute cosine similarity
cosine_sim = cosine_similarity(normalized_matrix, normalized_matrix)

# Creating a DataFrame for easier manipulation
cosine_sim_df = pd.DataFrame(cosine_sim, index=user_item_matrix.index, columns=user_item_matrix.index)

cosine_sim_df.head()

User.ID,8,9,10,12,14,16,17,19,22,26,...,278831,278832,278836,278843,278844,278846,278849,278851,278852,278854
User.ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Function to get book recommendations based on cosine similarity
def get_recommendations(user_id, cosine_sim_df, user_item_matrix):
    similar_users = cosine_sim_df[user_id].sort_values(ascending=False).index[1:]
    user_ratings = user_item_matrix.loc[user_id]
    
    # Get the most similar user and find their books which are not rated by the current user
    recommendations = user_item_matrix.loc[similar_users[0]][user_ratings == 0]
    
    sorted_recommendations = recommendations.sort_values(ascending=False)
    return recommendations.index.tolist()

# Example: Get Top 10 recommendations for User.ID 276729
user_id = 276729
recommendations = get_recommendations(user_id, cosine_sim_df, user_item_matrix)
print(f"Recommended books for User.ID {user_id}: {recommendations[:10]}")

Recommended books for User.ID 276729: [' Jason, Madison &amp', ' Other Stories;Merril;1985;McClelland &amp', ' Repairing PC Drives &amp', "'48", "'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities", '...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR', '01-01-00: A Novel of the Millennium', '1,401 More Things That P*Ss Me Off', '10 Commandments Of Dating', '100 Great Fantasy Short, Short Stories']
