In [None]:
import os
from typing import Optional
from dotenv import load_dotenv
from io import BytesIO
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains.summarize import load_summarize_chain
from langchain.chat_models import ChatOpenAI
from langchain.docstore.document import Document
import chromadb



# Load environment variables
load_dotenv()

# Define environment variables
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_KEY")
CHAINLIT_API_KEY = os.getenv("CHAINLIT_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Define paths and collection name
CHROMA_DATA_PATH = r"./embeddings_database" 
COLLECTION_NAME = "SearchEngine"

# Initialize the language model
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")

def get_bert_embedding(text):
    """Generates an embedding for the given text using BERT."""
    # Tokenize and encode the text
    encoded_text = bert_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    
    # Get the model output
    with torch.no_grad():  # Disable gradient calculation for efficiency
        outputs = bert_model(**encoded_text)
    
    # Get the last hidden state (typically used for embeddings)
    embeddings = outputs.last_hidden_state[:, 0, :]  # Take the first token's embedding (CLS token)
    
    # Convert the tensor to a list
    embeddings_list = embeddings.squeeze(0).tolist()  # Remove batch dimension and convert to list
    
    return embeddings_list


def retrieve_and_display_images(query, collection, top_k=):
    """Retrieves and displays images corresponding to the top matching captions. """

    if results is None:
        query_embedding = get_bert_embedding(query)
        results = collection.query(query_embeddings=[query_embedding], n_results=top_k, include=["documents"])

    image_urls = []
    for result in results:
        doc_id = result['id']
        if df is not None and 'image' in df.columns:
            # Assuming 'image' is a column in df containing image URLs or file paths
            image_url = df.loc[doc_id, 'image']
            image_urls.append(image_url)

    return image_urls


@cl.password_auth_callback
def auth_callback(username: str, password: str) -> Optional[cl.AppUser]:
    """Authenticates users based on credentials."""

    if (username, password) == ("admin", "admin"):
        return cl.AppUser(username="admin", role="ADMIN", provider="credentials")
    else:
        return None


@cl.on_chat_start
async def main():
    """Handles initial chat setup and retrieves relevant documents based on user input."""

    # Call the vectorDB and the embeddings function
    client = chromadb.PersistentClient(CHROMA_DATA_PATH)
    huggingface_embeddings = HuggingFaceEmbeddings(
        


@cl.on_chat_start
async def main():
    """Handles initial chat setup and retrieves relevant documents based on user input."""

    # Call the vectorDB and the embeddings function
    client = chromadb.PersistentClient(CHROMA_DATA_PATH)
    huggingface_embeddings = HuggingFaceEmbeddings(
        model_name="google-bert/bert-base-uncased",
        model_kwargs={"device": "cpu"},
    )
    text_splitter = SentenceSplitter()  # Add text splitter for sentence-level embeddings

    # Use the ChromaDB langchain wrapper (consider caching the Chroma instance)
    langchain_chroma = Chroma(
        client=client,
        collection_name=COLLECTION_NAME,
        embedding_function=huggingface_embeddings,
        text_splitter=text_splitter,  # Pass the text splitter to Chroma
    )
    retriever = langchain_chroma.as_retriever(search_kwargs={"k": 3}, search_type="similarity")
    cl.user_session.set("retriever", retriever)  # Store retriever for later use

    query = None
    while query is None:
        query = await cl.AskUserMessage(
            content="Please give a discription of the image ",
            author="Search Engine",
            max_size_mb=10,
        ).send()
    if query:
        # Process and display
        await cl.Message(content=f"Looking for an image of {query['content']}..",
                        author="Search Engine").send()
        image_urls = await cl.make_async(retrieve_and_display_images)(df)  # Assuming you have a DataFrame `your_dataframe` containing image data
        await cl.message(content="Here are images similar to your description:").send()
        for image_url in image_urls:
            await cl.image(image_url=image_url).send()  # Display retrieved images

    #=============================== Ask for an action =====================================================
    res=await cl.AskActionMessage(content="Are you satisfied with this answer ?",
                                    actions=[
                                        cl.Action(name="YES" , value="YES", label="YES"),
                                        cl.Action(name="NO" , value="NO", label="NO")
                                    ]).send()
    if res and res.get('value')=="YES":
                                    await cl.Message(content="YES!").send()