In [1]:
import os
from dotenv import load_dotenv

# Load the .env file
load_dotenv(".env")

# Access the secret
gemini_api_key = os.getenv("GEMINI_API_KEY")
os.environ["GEMINI_API_KEY"] = gemini_api_key
print(gemini_api_key)

AIzaSyCLMA5QUUBz0MFGN2Gkyd7DgCkZHTSOTu4


In [3]:
# 1. Load documents correctly with LangChain's loader
from langchain_community.document_loaders import DirectoryLoader
loader = DirectoryLoader('./documents/', glob="**/*.pdf")
documents = loader.load()

In [4]:
# 2. Split documents properly
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)

In [5]:
# 3. Initialize embeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embedding_function = GoogleGenerativeAIEmbeddings(
    google_api_key=gemini_api_key,
    model="models/text-embedding-004"
)

# 4. Create fresh Chroma instance
from langchain_chroma import Chroma
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embedding_function,
    persist_directory="./chroma_data"
)

In [6]:
# 5. Test retrieval
docs = vectorstore.similarity_search("bartleby", k=4)
print(f"Number of docs retrieved: {len(docs)}")
if docs:
    print(f"First doc content: {docs[0].page_content[:200]}")

Number of docs retrieved: 4
First doc content: I was touched. I said something in condolence with him. I hinted that of course he did wisely in abstaining from writing for a while; and urged him to embrace that opportunity of taking wholesome exer


In [7]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

os.environ["GEMINI_API_KEY"] = gemini_api_key
# Initialize the embedding function with your Gemini API key

embedding_function = GoogleGenerativeAIEmbeddings(
    google_api_key=os.environ[
        "GEMINI_API_KEY"
    ],  # Note: using google_api_key instead of api_key
    model="models/text-embedding-004",
)

# # Initialize the Chroma vector store
# vectorstore = Chroma(
#     persist_directory="./chroma_data", embedding_function=embedding_function
# )

# Create a retriever from the vector store
retriever = vectorstore.as_retriever()
# Debug statements
print("Embedding function initialized:", embedding_function)
print("Chroma vectorstore initialized:", vectorstore)
print("Retriever initialized:", retriever)

Embedding function initialized: client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x78118846d3a0> model='models/text-embedding-004' task_type=None google_api_key=SecretStr('**********') credentials=None client_options=None transport=None request_options=None
Chroma vectorstore initialized: <langchain_chroma.vectorstores.Chroma object at 0x7812d6372150>
Retriever initialized: tags=['Chroma', 'GoogleGenerativeAIEmbeddings'] vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7812d6372150> search_kwargs={}


In [8]:
from langchain_google_genai import GoogleGenerativeAI

# Initialize the language model with your Gemini API key
llm = GoogleGenerativeAI(model="gemini-1.5-flash", api_key=os.environ["GEMINI_API_KEY"])

In [9]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough  

# Define your prompt template
prompt_template = """
Answer the following question based on the provided context:

Context: {context}

Question: {question}

Answer:"""

# Create a PromptTemplate instance
prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

# Create the StuffDocumentsChain with the prompt
stuff_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)

# Create the retrieval chain with the correct input key mapper
qa_chain = (
    {"input": RunnablePassthrough()}
    | {
        "context": lambda x: retriever.invoke(x["input"]),
        "question": lambda x: x["input"],
    }
    | stuff_chain
)

In [11]:
question = "who was bartleby?"  # Simpler, direct question
response = qa_chain.invoke({"input": question})
print(response)

Based on the provided text, the narrator admits that very little is known about Bartleby.  The narrator only knows what he witnessed personally, and a vague rumor (that Bartleby had been a subordinate clerk in the Dead Letter Office) heard after Bartleby's death, the truth of which is uncertain.  Beyond that,  Bartleby remains largely a mystery.



In [None]:
import time
import gradio as gr

def slow_echo(message, history):
    question = message
    response = qa_chain.invoke({"input": question})
    for i in range(len(response)):
        time.sleep(0.01)
        yield "Response:" + response[: i+1]

with gr.Blocks() as demo:
    gr.Markdown("""
    # ðŸ“š Document Q&A Assistant
    Ask questions about your documents and get AI-powered answers.
    """)
    
    chatbot = gr.ChatInterface(
        fn=slow_echo,
        chatbot=gr.Chatbot(
            elem_id="chatbot",
            bubble_full_width=True,
            avatar_images=( "icons/user_1.png" , "icons/bot_1.png"),
            height=500
        ),
        title="Document Q&A",
        description="Ask me anything about your documents",
        theme="soft",
        examples=[
            "What is the main theme of the document?",
            "Can you summarize the key points?",
            "Who are the main characters mentioned?",
        ]
    )

demo.launch()



* Running on local URL:  http://127.0.0.1:7881

To create a public link, set `share=True` in `launch()`.


