In [47]:
import os
from dotenv import load_dotenv

# Load environment variables (API keys) from .env file
load_dotenv()

# Get API keys from environment variables
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# Check if API keys loaded
print(f"PINECONE_API_KEY Loaded: {bool(PINECONE_API_KEY)}")
print(f"GOOGLE_API_KEY Loaded: {bool(GOOGLE_API_KEY)}")

PINECONE_API_KEY Loaded: True
GOOGLE_API_KEY Loaded: True


In [48]:
from pinecone import Pinecone
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Connect to the existing index
pinecone_index = pc.Index("villambot")

# Initialize Google Gemini embedding model
embed_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

print("Connected to Pinecone index and initialized embedding model.")

Connected to Pinecone index and initialized embedding model.


In [115]:
# System Prompt for Gemini 2.0
system_prompt_template = """
Your name is VillamBot. you are a friendly and reliable assistant for Villam Hub. You are to Answer questions very very briefly and accurately.

Use the following information to answer the user's question:

{doc_content}

Provide very brief accurate and helpful response based on the provided information and your expertise.

"""
# {doc_content} is a placeholder where we'll insert relevant text from our data source.

# Function to retrieve top matching chunks from Pinecone
def retrieve_relevant_chunks(question):
    # Embed the user question
    query_vector = embed_model.embed_query(question)
    query_vector = [float(x) for x in query_vector]  # wrap it in a floats for pinecone compatibility

    # Query Pinecone for top 3 most similar text chunks
    search_results = pinecone_index.query(
        vector=query_vector,
        top_k=3,
        include_metadata=True
    )
    # DEBUG PRINT:
    print("\nRETRIEVED FROM PINECONE:")
    for i, match in enumerate(search_results["matches"]):
        print(f"Chunk {i+1}: {match['metadata'].get('text', '[NO TEXT]')}")

    # Extract 'text' field from metadata of matched results
    top_chunks = []
    for match in search_results.get("matches", []):
        content = match["metadata"].get("text", "")
        top_chunks.append(f"- {content}")

    # Return concatenated result or fallback if nothing found
    if not top_chunks:
        return "No relevant information found."

    # Escape curly braces in content to prevent format() issues in the prompt
    return "\n".join(top_chunks).replace("{", "{{").replace("}", "}}")


In [50]:
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.chains import LLMChain
from langchain_google_genai import ChatGoogleGenerativeAI
import streamlit as st 

# for chat history (memory), system + user prompts
# LLM chain (for tying everything together)
# Gemini chat model

In [117]:
# Main function to generate response
def generate_response(user_question, history=[]):
    """Generate a response using Pinecone + Gemini 2.0 with optional memory."""
    # 1. Retrieve the most relevant chunks from Pinecone
    context = retrieve_relevant_chunks(user_question)
    
    # 2. Format the system prompt using the retrieved content
    system_prompt = system_prompt_template.format(doc_content=context)
       
    # 3. Convert the passed chat history to LangChain format
    chat_history = ChatMessageHistory()
    for msg in history:
        if msg["role"] == "user":
            chat_history.add_user_message(msg["content"])
        elif msg["role"] == "assistant":
            chat_history.add_ai_message(msg["content"])
  
    # 4. Initialize memory for the chain
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        chat_memory=chat_history,
        return_messages=True
    )
    # 5. Define the full chat prompt
    prompt = ChatPromptTemplate(
        messages=[
            SystemMessagePromptTemplate.from_template(system_prompt),   # gives VillamBot its role + retrieved info
            MessagesPlaceholder(variable_name="chat_history"),          # allows past chat to be included
            HumanMessagePromptTemplate.from_template("{question}")      # inserts user's current question
        ]
    )
    # 6. Load the Gemini 2.0 Flash model
    chat_model = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        temperature=0.1,
        google_api_key=os.getenv("GOOGLE_API_KEY")
    )
    # 7. Combine LLM, prompt, and memory into a conversation chain
    conversation = LLMChain(
        llm=chat_model,
        prompt=prompt,
        memory=memory,
        verbose=True  # helps with debugging/logging
    )
    
    # 8. Ask the question and get the final answer
    result = conversation({"question": user_question})
    return result.get("text", "Sorry, I couldn't find an answer.")
