<a href="https://colab.research.google.com/github/kainat25/DSA-pr--20SW061/blob/main/book_recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
book_df = pd.read_csv('Books.csv')
ratings_df = pd.read_csv('Ratings.csv').sample(40000)
user_df = pd.read_csv('Users.csv')
user_rating_df = ratings_df.merge(user_df, left_on = 'User-ID', right_on = 'User-ID')

# Collaborative Filtering

In [None]:
book_user_rating = book_df.merge(user_rating_df, left_on = 'ISBN',right_on = 'ISBN')
book_user_rating = book_user_rating[['ISBN', 'Book-Title', 'Book-Author', 'User-ID', 'Book-Rating']]
book_user_rating.reset_index(drop=True, inplace = True)

In [None]:
d ={}
for i,j in enumerate(book_user_rating.ISBN.unique()):
    d[j] =i
book_user_rating['unique_id_book'] = book_user_rating['ISBN'].map(d)


In [None]:
users_books_pivot_matrix_df = book_user_rating.pivot(index='User-ID',
                                                          columns='unique_id_book',
                                                          values='Book-Rating').fillna(0)



In [None]:
users_books_pivot_matrix_df.head()

unique_id_book,0,1,2,3,4,5,6,7,8,9,...,9550,9551,9552,9553,9554,9555,9556,9557,9558,9559
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
users_books_pivot_matrix_df = users_books_pivot_matrix_df.values
users_books_pivot_matrix_df

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
from scipy.sparse.linalg import svds

NUMBER_OF_FACTORS_MF = 15

#Performs matrix factorization of the original user item matrix
U, sigma, Vt = svds(users_books_pivot_matrix_df, k = NUMBER_OF_FACTORS_MF)

In [None]:
sigma = np.diag(sigma)
sigma.shape

(15, 15)

In [None]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)
all_user_predicted_ratings

array([[ 3.59166639e-32,  6.44170618e-21, -1.37991504e-32, ...,
         0.00000000e+00,  4.46752588e-18,  0.00000000e+00],
       [-6.85596679e-33,  3.00007063e-19, -9.71219024e-32, ...,
         0.00000000e+00, -6.05278616e-19,  0.00000000e+00],
       [-3.73142601e-33,  2.21270113e-21, -5.41291150e-33, ...,
         0.00000000e+00, -1.86897340e-18,  0.00000000e+00],
       ...,
       [ 9.22037932e-20, -1.00231081e-05,  1.74038361e-19, ...,
         0.00000000e+00, -7.46833774e-05,  0.00000000e+00],
       [ 3.52812807e-20, -6.87014646e-07,  3.31696250e-19, ...,
         0.00000000e+00, -8.03444602e-07,  0.00000000e+00],
       [ 3.77468452e-21,  3.19644223e-07,  6.43902567e-20, ...,
         0.00000000e+00,  1.72992889e-05,  0.00000000e+00]])

In [None]:
def top_cosine_similarity(data, book_id, top_n=10):
    if book_id >= data.shape[0]:
        print("Invalid book_id. It is out of bounds for the data array.")
        return []

    book_row = data[book_id, :]
    magnitude = np.sqrt(np.einsum('ij, ij -> i', data, data))
    similarity = np.dot(book_row, data.T) / (magnitude[book_id] * magnitude)
    sort_indexes = np.argsort(-similarity)
    return sort_indexes[:top_n]

def similar_books(book_user_rating, book_id, top_indexes):
    if not top_indexes:
        print("No recommendations available for the given book_id.")
        return

    print('Recommendations for {0}:\n'.format(
        book_user_rating[book_user_rating.unique_id_book == book_id]['Book-Title'].values[0]))

    for id in top_indexes:
        print(book_user_rating[book_user_rating.unique_id_book == id]['Book-Title'].values[0])

In [None]:
k = 50
movie_id = 25954
top_n = 3

# Assuming "sliced" is a 2D array with shape (9560, k)
sliced = Vt.T[:, :k]  # representative data

top_indexes = top_cosine_similarity(sliced, movie_id, top_n)
similar_books(book_user_rating, movie_id, top_indexes)


Invalid book_id. It is out of bounds for the data array.
No recommendations available for the given book_id.
