In [7]:
import os
from dotenv import load_dotenv

# Load environment variables (API keys) from .env file
load_dotenv()

# Get API keys from environment variables
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# Check if API keys loaded
print(f"PINECONE_API_KEY Loaded: {bool(PINECONE_API_KEY)}")
print(f"GOOGLE_API_KEY Loaded: {bool(GOOGLE_API_KEY)}")

PINECONE_API_KEY Loaded: True
GOOGLE_API_KEY Loaded: True


In [9]:
from pinecone import Pinecone
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Connect to the existing index
pinecone_index = pc.Index("villambot")

# Initialize Google Gemini embedding model
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

print("Connected to Pinecone index and initialized embedding model.")

Connected to Pinecone index and initialized embedding model.


In [11]:
# System Prompt for Gemini 2.0
system_prompt_template = """
You are VillamBot, a warm, grounded, and reliable assistant for Villam Hub.
Your role is to support users on their journey to sustainable living by helping them understand our platform services, farming kits, and how to grow food at home.

Use the following information to answer the user's question:

{doc_content}

Respond clearly, accurately, and concisely based on the provided information and your expertise in sustainable agriculture, eco-friendly practices, and urban farming. 

"""
# {doc_content} is a placeholder where we'll insert relevant text from our data source.

# Function to retrieve top matching chunks from Pinecone
def retrieve_relevant_chunks(question):
    # Embed the user question
    query_vector = embed_model.embed_query(question)
    query_vector = [float(x) for x in query_vector]  # Ensure compatibility with Pinecone

    # Query Pinecone for top 3 most similar text chunks
    search_results = pinecone_index.query(
        vector=query_vector,
        top_k=3,
        include_metadata=True
    )

    # Extract 'text' field from metadata of matched results
    top_chunks = []
    for match in search_results.get("matches", []):
        content = match["metadata"].get("text", "")
        top_chunks.append(content)

    # Return concatenated result or fallback if nothing found
    if not top_chunks:
        return "No relevant information found."

    # Escape curly braces in content to prevent format() issues in the prompt
    return "\n".join(top_chunks).replace("{", "{{").replace("}", "}}")


In [None]:
def generate_response(question):
    """Generate a response using Pinecone retrieval and Gemini 2.0 Flash."""

    # Create a new asyncio event loop (needed when calling async from sync context)
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)

    # 1. Embed the user's question
    query_embed = embed_model.embed_query(question)
    query_embed = [float(val) for val in query_embed]  # Ensure standard Python floats

    # 2. Query Pinecone for relevant document chunks
    results = pinecone_index.query(
        vector=query_embed,
        top_k=3,  # Get top 3 most relevant chunks
        include_values=False,
        include_metadata=True
    )

    # 3. Extract document contents from metadata (stored under "text")
    doc_contents = []
    print("\n" + "="*50)
    print(f"RETRIEVED DOCUMENTS FOR: '{question}'")
    for i, match in enumerate(results.get('matches', [])):
        text = match['metadata'].get('text', '')  # Make sure to match the metadata key you used when upserting
        doc_contents.append(text)
        print(f"\nDOCUMENT {i+1}:\n{text}\n")
    print("="*50 + "\n")

    # 4. Join all retrieved content into one string (or show fallback)
    doc_content = "\n".join(doc_contents).replace('{', '{{').replace('}', '}}') \
        if doc_contents else "No additional information found from my knowledge base."

    # 5. Format the system prompt with the document content
    formatted_prompt = system_prompt_template.format(doc_content=doc_content)

    # 6. Rebuild chat history from Streamlit's session state
    chat_history = ChatMessageHistory()
    for msg in st.session_state.chat_history:
        if msg["role"] == "user":
            chat_history.add_user_message(msg["content"])
        elif msg["role"] == "assistant":
            chat_history.add_ai_message(msg["content"])

    # 7. Initialize LangChain memory with that chat history
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        chat_memory=chat_history,
        return_messages=True
    )

    # 8. Create the full prompt template
    prompt = ChatPromptTemplate(
        messages=[
            SystemMessagePromptTemplate.from_template(formatted_prompt),
            MessagesPlaceholder(variable_name="chat_history"),
            HumanMessagePromptTemplate.from_template("{question}")
        ]
    )

    # 9. Initialize Gemini 2.0 Flash model
    chat = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        temperature=0.1,
        google_api_key=GOOGLE_API_KEY
    )

    # 10. Create the LangChain chain (LLM + prompt + memory)
    conversation = LLMChain(
        llm=chat,
        prompt=prompt,
        memory=memory,
        verbose=True
    )

    # 11. Run the chain and return the assistantâ€™s response
    res = conversation({"question": question})
    return res.get('text', '')
