In [16]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv

from langchain_community.document_loaders import TextLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

import gradio as gr

# Load environment variables
load_dotenv()

# Load and process books data
books = pd.read_csv("books_with_emotions.csv")
books["large_thumbnail"] = books["thumbnail"] + "&fife=w800"
books["large_thumbnail"] = np.where(
    books["large_thumbnail"].isna(),
    "cover-not-found.jpg",
    books["large_thumbnail"],
)

# Load and process documents
raw_documents = TextLoader("tagged_description.txt", encoding="utf-8").load()

text_splitter = CharacterTextSplitter(separator="\n", chunk_size=10000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

# Initialize embeddings properly - this is the fix for your error
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
db_books = Chroma.from_documents(documents, embeddings)


def retrieve_semantic_recommendations(
        query: str,
        category: str = None,
        tone: str = None,
        initial_top_k: int = 50,
        final_top_k: int = 16,
) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k=initial_top_k)
    books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
    book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k)

    if category != "All":
        book_recs = book_recs[book_recs["simple_categories"] == category].head(final_top_k)
    else:
        book_recs = book_recs.head(final_top_k)

    if tone == "Happy":
        book_recs = book_recs.sort_values(by="joy", ascending=False)
    elif tone == "Surprising":
        book_recs = book_recs.sort_values(by="surprise", ascending=False)
    elif tone == "Angry":
        book_recs = book_recs.sort_values(by="anger", ascending=False)
    elif tone == "Suspenseful":
        book_recs = book_recs.sort_values(by="fear", ascending=False)
    elif tone == "Sad":
        book_recs = book_recs.sort_values(by="sadness", ascending=False)

    return book_recs


def recommend_books(
        query: str,
        category: str,
        tone: str
):
    try:
        recommendations = retrieve_semantic_recommendations(query, category, tone)
        results = []

        for _, row in recommendations.iterrows():
            description = row["description"]
            # Handle NaN descriptions
            if pd.isna(description):
                description = "No description available."

            truncated_desc_split = str(description).split()
            truncated_description = " ".join(truncated_desc_split[:30]) + "..."

            authors_split = str(row["authors"]).split(";")
            if len(authors_split) == 2:
                authors_str = f"{authors_split[0]} and {authors_split[1]}"
            elif len(authors_split) > 2:
                authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
            else:
                authors_str = str(row["authors"])

            caption = f"{row['title']} by {authors_str}: {truncated_description}"
            results.append((row["large_thumbnail"], caption))

        return results

    except Exception as e:
        print(f"Error in recommend_books: {e}")
        return [("cover-not-found.jpg", f"Error occurred: {str(e)}")]


# Prepare dropdown options
try:
    categories = ["All"] + sorted(books["simple_categories"].dropna().unique())
    tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]
except Exception as e:
    print(f"Error preparing categories: {e}")
    categories = ["All"]
    tones = ["All"]

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Glass()) as dashboard:
    gr.Markdown("# Semantic Book Recommender")
    gr.Markdown("Find books based on semantic similarity and emotional tone!")

    with gr.Row():
        user_query = gr.Textbox(
            label="Please enter a description of a book:",
            placeholder="e.g., A story about forgiveness",
            lines=2
        )

    with gr.Row():
        category_dropdown = gr.Dropdown(
            choices=categories,
            label="Select a category:",
            value="All"
        )
        tone_dropdown = gr.Dropdown(
            choices=tones,
            label="Select an emotional tone:",
            value="All"
        )

    with gr.Row():
        submit_button = gr.Button("Find Recommendations", variant="primary")
        clear_button = gr.Button("Clear", variant="secondary")

    gr.Markdown("## Recommendations")
    output = gr.Gallery(
        label="Recommended books",
        columns=4,
        rows=4,
        height="auto"
    )

    # Event handlers
    submit_button.click(
        fn=recommend_books,
        inputs=[user_query, category_dropdown, tone_dropdown],
        outputs=output
    )

    clear_button.click(
        lambda: ("", "All", "All", []),
        inputs=[],
        outputs=[user_query, category_dropdown, tone_dropdown, output]
    )

if __name__ == "__main__":
    dashboard.launch(
        share=False,  # Set to True if you want to create a public link
        server_name="127.0.0.1",
        server_port=7860
    )

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.
