# Recommendation System for books

In [23]:
import numpy as np
import pandas as pd

In [24]:
books = pd.read_csv('book.csv')

In [25]:
books.head()

Unnamed: 0.1,Unnamed: 0,User.ID,Book.Title,Book.Rating
0,1,276726,Classical Mythology,5
1,2,276729,Clara Callan,3
2,3,276729,Decision in Normandy,6
3,4,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,5,276737,The Mummies of Urumchi,6


In [26]:
books.drop('Unnamed: 0', axis =1, inplace = True)

In [27]:
books.head()

Unnamed: 0,User.ID,Book.Title,Book.Rating
0,276726,Classical Mythology,5
1,276729,Clara Callan,3
2,276729,Decision in Normandy,6
3,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,276737,The Mummies of Urumchi,6


In [32]:
books['Book.Title'].value_counts()

Book.Title
Fahrenheit 451                                              5
Charlie and the Chocolate Factory                           4
The Subtle Knife (His Dark Materials, Book 2)               4
Vanished                                                    4
Ender's Game (Ender Wiggins Saga (Paperback))               4
                                                           ..
Murder on St. Mark's Place (Gaslight Mysteries)             1
State of Grace                                              1
Valsalva's Maneuver: Mots Justes and Indispensable Terms    1
I love you, I hate you                                      1
Kids Say the Darndest Things                                1
Name: count, Length: 9659, dtype: int64

In [33]:
len(books['Book.Title'].unique())

9659

In [34]:
len(books['User.ID'].unique())

2182

In [35]:
books.shape

(10000, 3)

This means that only as very small percentage of books are read by multiple peoples!

In [36]:
from scipy.sparse import csr_matrix
from sklearn.preprocessing import LabelEncoder

In [37]:
user_encoder = LabelEncoder()
book_encoder = LabelEncoder()

In [38]:
book_encoder.fit(books['Book.Title'])

In [39]:
books['Book.Title'] = book_encoder.transform(books['Book.Title'])

In [40]:
book_encoder.transform(['Clara Callan'])

array([1440])

In [41]:
user_encoder.fit(books['User.ID'])

In [42]:
books['User.ID'] = user_encoder.transform(books['User.ID'])

In [43]:
user_item_matrix = csr_matrix((books['Book.Rating'], (books['User.ID'], books['Book.Title'])))

In [44]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity
cosine_sim = cosine_similarity(user_item_matrix, user_item_matrix)

In [45]:
def recommend(book_id, user_item_matrix, cosine_sim, book_encoder, user_encoder, top_n=5):
    # Get the cosine similarity scores for the given book
    sim_scores = cosine_sim[book_id]

    # Find indices of books with highest similarity scores
    similar_books = sim_scores.argsort()[::-1][1:top_n+1]  # Exclude the book itself (index 0)

    # Convert book IDs back to titles
    similar_books_titles = book_encoder.inverse_transform(similar_books)

    return similar_books_titles



In [46]:
# Example: Get recommendations for a book with ID 42 (replace with the actual book ID)
book_title = 'Clara Callan'
book_id = book_encoder.transform([book_title])
book_id

array([1440])

In [47]:
bi = int(book_id)

In [48]:
bi

1440

In [49]:
recommended_books = recommend(bi, user_item_matrix, cosine_sim, book_encoder, user_encoder)
print(f"Recommended books for book ID {book_title}: {recommended_books}")

Recommended books for book ID Clara Callan: ['Dragonsong (Harper Hall Trilogy)' 'AyÃºdate'
 'Axis Of Conflict   The Terror File'
 'Awful End (Eddie Dickens Trilogy S.)' 'Away from It All']
