In [1]:
import os
from dotenv import load_dotenv
from pinecone import Pinecone
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.chains import LLMChain
from langchain_google_genai import ChatGoogleGenerativeAI

# for chat history (memory), system + user prompts
# LLM chain (for tying everything together)
# Gemini chat model

In [2]:
# Load API keys from .env file
load_dotenv()

# Get API keys from .env variables
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# Check if API keys loaded
print(f"PINECONE_API_KEY Loaded: {bool(PINECONE_API_KEY)}")
print(f"GOOGLE_API_KEY Loaded: {bool(GOOGLE_API_KEY)}")

PINECONE_API_KEY Loaded: True
GOOGLE_API_KEY Loaded: True


In [3]:
# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Connect to the existing index
pinecone_index = pc.Index("vchat")

# Initialize Google Gemini embedding model
embed_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.getenv("GOOGLE_API_KEY")
)

print("Connected to Pinecone index and initialized embedding model.")

Connected to Pinecone index and initialized embedding model.


In [4]:
# System Prompt for Gemini 2.0
system_prompt_template = """
you are vchat, an AI assistant for a start up product called villam hub.
Answer questions very very briefly and accurately. Use the following information to answer the user's question:

{doc_content}

Provide very brief accurate and helpful health response based on the provided information and your expertise.
"""
# {doc_content} is a placeholder where we'll insert relevant text from our data source.

# Function to retrieve top matching chunks from Pinecone
def retrieve_relevant_chunks(question):
    # Embed the user question
    query_vector = embed_model.embed_query(question)
    query_vector = [float(x) for x in query_vector]  # wrap it in a floats for pinecone compatibility

    # Query Pinecone for top 3 most similar text chunks
    search_results = pinecone_index.query(
        vector=query_vector,
        top_k=2,
        include_values=False,
        include_metadata=True
    )
    # Extract 'text' field from metadata of matched results
    top_chunks = [match["metadata"].get("text", "") for match in search_results.get("matches", [])]
        
    # Return concatenated result or fallback if nothing found
    if not top_chunks:
        return "No relevant information found."
        
    # Clean chunks
    clean_chunk = [f"- {chunk.strip()}" for chunk in top_chunks]

    # Escape curly braces in content to prevent format() issues in the prompt
    return "\n".join(clean_chunk).replace("{", "{{").replace("}", "}}")

In [None]:
# Main function to generate response
def generate_response(user_question, history=[]):
    """Generate a response using Pinecone + Gemini 2.0 with optional memory."""
    
    # Retrieve the most relevant chunks from Pinecone
    context = retrieve_relevant_chunks(user_question)
    print('Retrieved context from pinecone: \n', context)
    
    # Format the system prompt using the retrieved content
    system_prompt = system_prompt_template.format(doc_content=context)
       
    # Convert the passed chat history to LangChain format
    chat_history = ChatMessageHistory()
    for msg in history:
        if msg["role"] == "user":
            chat_history.add_user_message(msg["content"])
        elif msg["role"] == "assistant":
            chat_history.add_ai_message(msg["content"])
  
    # Initialize memory for the chain
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        chat_memory=chat_history,
        return_messages=True
    )
    # Define the full chat prompt
    prompt = ChatPromptTemplate(
        messages=[
            SystemMessagePromptTemplate.from_template(system_prompt),   # gives V-Chat its role + retrieved info
            MessagesPlaceholder(variable_name="chat_history"),          # allows past chat to be included
            HumanMessagePromptTemplate.from_template("{question}")      # inserts user's current question
        ]
    )
    # Load the Gemini 2.0 Flash LLM
    chat_model = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        temperature=0,
        google_api_key=os.getenv("GOOGLE_API_KEY")
    )
    # Combine LLM, prompt, and memory into a conversation chain
    conversation = LLMChain(
        llm=chat_model,
        prompt=prompt,
        memory=memory,
        verbose=True  # helps with debugging/logging
    )
    
    # Ask the question and get the final answer
    result = conversation({"question": user_question})
    print("Prompt passed to Gemini:\n", system_prompt)
    return result.get("text", "Sorry, I couldn't find an answer.")
    
