# Fifth and Final part of Book Recommender System

In [None]:
# for Google Colab install using below commands
!pip install gradio
!pip install langchain_community
!pip install langchain_openai
!pip install langchain_text_splitters
!pip install langchain_chroma
!pip install dotenv
!pip install --upgrade numpy
# !pip install numpy==1.25.2 --quiet # In NumPy 1.26.x versions, some internal checks related to numpy.rec are broken in certain environments like Colab.

Collecting gradio
  Downloading gradio-5.22.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [None]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma
import gradio as gr
from google.colab import drive
from google.colab import userdata
import os

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
drive.mount('/content/drive')
load_dotenv()

Mounted at /content/drive


False

In [None]:
# we also have link of cover of each books in the csv file but the size of all book's cover are different
books = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Book Recommender/books_with_emotions.csv")
books["large_thumbnail"] = books["thumbnail"] + "&fife=w800" # getting the largest possible book cover size that is available to them
# threre are some books which don't have cover, so we will show user a templete cover of book
books["large_thumbnail"] = np.where(
    books["large_thumbnail"].isna(),
    "/content/drive/MyDrive/Colab Notebooks/Book Recommender/cover-not-found.jpg",
    books["large_thumbnail"],
)

In [None]:
raw_documents = TextLoader("/content/drive/MyDrive/Colab Notebooks/Book Recommender/tagged_description.txt").load()
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=0, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db_books = Chroma.from_documents(documents, OpenAIEmbeddings())

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [None]:
# retrieve semantic recoomendations from from our books dataset and apply filtering based on categories and sorting based on tone.
def retrieve_semantic_recommendations(
        query: str,
        category: str = None,
        tone: str = None,
        initial_top_k: int = 50,
        final_top_k: int = 16,
) -> pd.DataFrame:

    recs = db_books.similarity_search(query, k=initial_top_k)
    books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
    book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k)

    if category != "All":
        book_recs = book_recs[book_recs["simple_categories"] == category].head(final_top_k)
    else:
        book_recs = book_recs.head(final_top_k)

    # getting the emotion only that people actually search for
    if tone == "Happy":
        book_recs.sort_values(by="joy", ascending=False, inplace=True)
    elif tone == "Surprising":
        book_recs.sort_values(by="surprise", ascending=False, inplace=True)
    elif tone == "Angry":
        book_recs.sort_values(by="anger", ascending=False, inplace=True)
    elif tone == "Suspenseful":
        book_recs.sort_values(by="fear", ascending=False, inplace=True)
    elif tone == "Sad":
        book_recs.sort_values(by="sadness", ascending=False, inplace=True)

    return book_recs

In [None]:
def recommend_books(
        query: str,
        category: str,
        tone: str
):
    recommendations = retrieve_semantic_recommendations(query, category, tone)
    results = []

    # looping over each recommendation
    for _, row in recommendations.iterrows():
        description = row["description"]
        truncated_desc_split = description.split()
        truncated_description = " ".join(truncated_desc_split[:30]) + "..." # only displaying first 30 letters of book descriptions

        # spliting if have more than one authors
        authors_split = row["authors"].split(";")
        if len(authors_split) == 2: # two authors
            authors_str = f"{authors_split[0]} and {authors_split[1]}"
        elif len(authors_split) > 2: # more than two authors
            authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
        else: # only one authors
            authors_str = row["authors"]

        # final caption to be displayed
        caption = f"{row['title']} by {authors_str}: {truncated_description}"
        results.append((row["large_thumbnail"], caption)) # tuple of thumbnail and caption
    return results

# list containing all of our categories and all categories
categories = ["All"] + sorted(books["simple_categories"].unique()) # for categories
tones = ["All"] + ["Happy", "Surprising", "Angry", "Suspenseful", "Sad"] # for emotions

In [None]:
recommend_books('Book on love','Fiction','Sad')

[('http://books.google.com/books/content?id=170LAQAAMAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api&fife=w800',
  'Women by Charles Bukowski: Tells the story of an ugly middle-aged man who has gone unloved for too long, but a change comes over him at the age of fifty as he begins more...'),
 ('http://books.google.com/books/content?id=wXGK6gBEULcC&printsec=frontcover&img=1&zoom=1&source=gbs_api&fife=w800',
  'The Amateur Marriage by Anne Tyler: Marrying quickly during World War II after falling in love at first sight, a mismatched couple discovers that their very different personalities and approaches to life are taking a toll...'),
 ('http://books.google.com/books/content?id=-Il7XPFUAbgC&printsec=frontcover&img=1&zoom=1&source=gbs_api&fife=w800',
  "The History of Love: A Novel by Nicole Krauss: Sixty years after a book's publication, its author remembers his lost love and missing son, while a teenage girl, named for one of the book's characters, seeks her namesake, as..."),
 ('ht

In [None]:
# visualizing using gradio (local host on browser) and for customization and other theme etc, visit gradio website.
with gr.Blocks(theme = gr.themes.Glass()) as dashboard:
    gr.Markdown("# Semantic book recommender")

    with gr.Row():
        user_query = gr.Textbox(label = "Please enter a description of a book:",
                                placeholder = "e.g., A story about forgiveness")
        category_dropdown = gr.Dropdown(choices = categories, label = "Select a category:", value = "All")
        tone_dropdown = gr.Dropdown(choices = tones, label = "Select an emotional tone:", value = "All")
        submit_button = gr.Button("Find recommendations")

    gr.Markdown("## Recommendations")
    output = gr.Gallery(label = "Recommended books", columns = 8, rows = 2) # that is why we have 16 recommendation

    submit_button.click(fn = recommend_books,
                        inputs = [user_query, category_dropdown, tone_dropdown],
                        outputs = output)

In [None]:
if __name__ == "__main__":
    dashboard.launch(share=True,debug=True)
    # 'share=True' ensures public link generation

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://8b90536f40c984d32e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://8b90536f40c984d32e.gradio.live
