In [6]:
import pandas as pd
import numpy as np

import os

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

from transformers import pipeline

import gradio as gr

PERSIST_DIR = '../data/db_books_embeddings'

In [30]:
# Load books database
books = pd.read_csv("../data/books_for_dashboard.csv",
                    dtype={'isbn13': str, 'isbn': str,})

In [8]:
# Load vector databse for semantic recommendations
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

db_books = Chroma(
    embedding_function=embedding_model,
    persist_directory=PERSIST_DIR
)

count = db_books._collection.count()
print(f"Successfully loaded database with {count} documents from '{PERSIST_DIR}'.")

# Example query to show it works
results = db_books.similarity_search("Fantasy", k=1)
print(f"Query Result: {results[0].page_content}")



Successfully loaded database with 5197 documents from '../data/db_books_embeddings'.
Query Result: Returning to his hometown of Bramwell after years of wandering, mercenary Darrick Lang discovers that a dark and horrifying force has ensnared its citizens and now seeks to seize him, in a chilling novel of dark fantasy, based on the popular video game. Original. (A Blizzard Entertainment M-rated electronic game) (Horror)


In [17]:
books['large_thumbnail'] = np.where(books['thumbnail'].isna(),
                                    "../data/cover_not_found.png",
                                    books['thumbnail'] + "&wfife=w800")

In [53]:
def retrieve_semantic_recomendations(
    query: str,
    category: str = None,
    tone: str = None,
    initial_top_k: int = 50,
    final_top_k: int = 16
) -> pd.DataFrame:

    tone_emotion_dict = dict(
        Happy = 'joy',
        Surprising = 'surprise',
        Angry = 'anger',
        Suspensful = 'fear',
        Sad = 'sadness'
    )
    
    recommendations = db_books.similarity_search(query, k=initial_top_k)
    books_ids = [doc.id for doc in recommendations]
    book_recs = books.query("isbn13.isin(@books_ids)").copy()
    
    if category != "All":
        book_recs.query("simple_categories == @category", inplace=True)

    book_recs.sort_values(tone_emotion_dict.get(tone, 'title_and_subtitle'),
                          ascending=False, inplace=True)

    return book_recs

In [None]:
def recommend_books(
    query: str,
    category: str,
    tone: str
):
    recommendations = retrieve_semantic_recommendations(query, category, tone)
    
    results = []
    for _, row in recommendations.iterrows():