In [83]:
#https://www.kaggle.com/datasets/saurabhbagchi/books-dataset?select=books_data

import pandas as pd

books = pd.read_csv("books.csv", encoding="latin1", quotechar='"', on_bad_lines="skip", low_memory=False)

books = books.drop(columns=['Image-URL-S', 'Image-URL-M', 'Image-URL-L', 'Publisher'])

books.rename(columns={'ISBN': 'isbn', 'Book-Title' : 'title', 'Book-Author' : 'author', 'Year-Of-Publication' : 'pub_year'}, inplace=True)

In [84]:
books

Unnamed: 0,isbn,title,author,pub_year
0,0195153448,Classical Mythology,Mark P. O. Morford,2002
1,0002005018,Clara Callan,Richard Bruce Wright,2001
2,0060973129,Decision in Normandy,Carlo D'Este,1991
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999
...,...,...,...,...
271259,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988
271260,0525447644,From One to One Hundred,Teri Sloat,1991
271261,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004
271262,0192126040,Republic (World's Classics),Plato,1996


In [85]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(books["title"])

In [86]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = books.iloc[indices][::-1]
    return results

In [87]:
import ipywidgets as widgets
from IPython.display import display

book_input = widgets.Text(
    value="Goat Brothers",
    description="Book Title",
    disabled=False
)
book_list = widgets.Output()

def on_type(data):
    with book_list:
        book_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            display(search(title))

book_input.observe(on_type, names='value')

display(book_input, book_list)

Text(value='Goat Brothers', description='Book Title')

Output()

In [88]:
ratings = pd.read_csv("ratings.csv", encoding="latin1", quotechar='"', on_bad_lines="skip", low_memory=False)

ratings.rename(columns={'User-ID' : 'userId', 'ISBN' : 'isbn', 'Book-Rating' : 'rating'}, inplace=True)


In [89]:
isbn = "0553571338"
minimum_rating = 7

In [90]:
similar_readers = ratings[(ratings["isbn"] == isbn) & (ratings["rating"] > minimum_rating)]["userId"].unique()

In [91]:
similar_readers

array([  7841,  27769,  64185,  74026, 203240, 264688])

In [92]:
similar_reader_recs = ratings[(ratings["userId"].isin(similar_readers)) & (ratings["rating"] > minimum_rating)]

In [93]:
similar_reader_recs

Unnamed: 0,userId,isbn,rating
33056,7841,0060987103,9
33058,7841,0062509594,9
33059,7841,0140185216,10
33060,7841,014034294X,8
33061,7841,0140348107,10
...,...,...,...
1102917,264688,0553571338,9
1102918,264688,0590216880,10
1102919,264688,0590997289,8
1102920,264688,0698119002,10
