In [7]:
import os
from dotenv import load_dotenv
import vertexai
from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate
import json
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import asyncio
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
from pinecone import ServerlessSpec
import time
#import logging


### Define file paths
#script_dir = os.path.dirname(os.path.abspath(__file__))
script_dir = os.getcwd()
config_path = os.path.join(script_dir, "../", "config.json")
with open(config_path, "r") as file:
    config = json.load(file)

dotenv_path = os.path.join(script_dir, "../.env")
#pdf_path = os.path.join(script_dir, "data", config["pdf_path"])
#output_text_path = os.path.join(script_dir, "data", config["output_text_path"])
#saved_prompts = os.path.join(script_dir, "../", config["saved_prompts"])
#log_path = os.path.join(script_dir, "../", config["logs"])


### Load environment variables
load_dotenv(dotenv_path)
GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
GOOGLE_CLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT")
GOOGLE_CLOUD_LOCATION = os.getenv("GOOGLE_CLOUD_LOCATION")
GOOGLE_GENAI_USE_VERTEXAI = os.getenv("GOOGLE_GENAI_USE_VERTEXAI")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")


# # Configure logging
# logging.basicConfig(
#     filename=log_path,   
#     level=logging.INFO,    
#     format="%(asctime)s - %(levelname)s - %(message)s"
# )


### Configure the Language Model
vertexai.init(project=GOOGLE_CLOUD_PROJECT, location=GOOGLE_CLOUD_LOCATION) 
gemini = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai")

In [13]:
from langchain_google_vertexai import VertexAIEmbeddings

embeddings = VertexAIEmbeddings(model="text-embedding-005")

In [14]:
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone

index_name = "data-vectors"
namespace = "Acts"
pinecone = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pinecone.Index(index_name)

vector_store = PineconeVectorStore(embedding=embeddings, index=index, namespace=namespace)

In [17]:
from langchain import hub
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = gemini.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [40]:
response = graph.invoke({"question": "How many times person can be president maximum?"})
print(response["answer"])

I'm sorry, but the provided context does not specify the maximum number of times a person can be president. The text focuses on the election process, qualifications, and potential removal of a president. Therefore, I cannot answer your question.



In [10]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])