In [1]:
from dotenv import load_dotenv
load_dotenv()


from src.services.vector_storage_service import VectorStorageManager
from src.services.chunking_process import SemanticChunkerWithNLP
from src.services.loading_documents import DocumentLoader
from src.services.QueryTransformation import QueryTransformation



query_transformer = QueryTransformation()
document_loader = DocumentLoader()
semantic_chunker_nlp = SemanticChunkerWithNLP()
vector_store_manager = VectorStorageManager()

In [2]:
loaded_documents = document_loader.load_from_local("./rag-data/")

In [None]:
# chunked_documents = semantic_chunker_nlp.chunk_and_enrich(loaded_documents)

In [5]:

# vector_store = vector_store_manager.store_in_faiss(chunked_documents, "faiss_index")
vector_store = vector_store_manager.exist_in_faiss()

In [11]:
user_query = "Indian Agriculture and its irrigation system, also i need to know about indian independence movement"
processed_query = query_transformer.process_query(user_query)

In [12]:
processed_query

{'Q1': 'What are the characteristics of Indian agriculture?',
 'Q2': 'How does the irrigation system in India work?',
 'Q3': 'What was the Indian independence movement?'}

In [13]:
final_query_context = []

for each_query_key, each_query_value in processed_query.items():
    retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10})
    top_k_docs = retriever.get_relevant_documents(each_query_value)
    final_query_context.append({"question": each_query_value, "context": "["+"\n\n".join([doc.page_content for doc in top_k_docs[:5]]) + "]"})
    

  top_k_docs = retriever.get_relevant_documents(each_query_value)


In [14]:
final_query_context_string = ""


for index, each_query_context in enumerate(final_query_context):
    final_query_context_string = final_query_context_string + f"\nQuestion {index+1}: " + each_query_context["question"] + f"\n\nContext {index+1}: \n" + each_query_context["context"]

final_query_context_string

'\nQuestion 1: What are the characteristics of Indian agriculture?\n\nContext 1: \n[Tamil Nadu achieved highest yields in rice and sugarcane, Haryana in wheat and coarse grains, Karnataka in cotton, Bihar in pulses, while other states do well in horticulture, aquaculture, flower and fruit plantations. These differences in agricultural productivity are a function of local infrastructure, soil quality, micro-climates, local resources, farmer knowledge and innovations. The Indian food distribution system is highly inefficient. Movement of agricultural produce is heavily regulated, with inter-state and even inter-district restrictions on marketing and movement of agricultural goods. One study suggests Indian agricultural policy should best focus on improving rural infrastructure primarily in the form of irrigation and flood control infrastructure, knowledge transfer of better yielding and more disease resistant seeds. Additionally, cold storage, hygienic food packaging and efficient modern

In [40]:
import os
from anthropic import AnthropicBedrock


def talk_to_anthropic(context):

    system_prompt = f"""
You are a helpful and concise AI assistant. Your task is to generate informative and structured answers based solely on the paired context for each user question.

Instructions:
- Each question is followed by its corresponding context.
- Answer each question independently.
- Do NOT include or repeat the question in your response.
- Begin each answer with a short, descriptive title (do NOT prefix it with "Title:", "Question [No]:").
- On the line below the title, provide the most complete and accurate answer using only the provided context.
- You may synthesize or paraphrase the context to form a complete answer.
- Use logical inference if necessary, but do NOT introduce facts not supported or implied by the context.
- Structure the answer using bullet points or numbered lists if helpful.
- If the context lacks enough information to generate a meaningful response, write:
  Information not available for the given title: The provided context does not contain sufficient details to answer this question.

Now process the following question-context pairs:

{context}
"""

    # print(system_prompt)
    aws_access_key = os.getenv("AWS_ACCESS_KEY")
    aws_secret_key = os.getenv("AWS_SECRET_KEY")
    aws_region = os.getenv("AWS_REGION")
    aws_model = os.getenv("AWS_MODEL")

    client = AnthropicBedrock(
        aws_access_key=aws_access_key,
        aws_secret_key=aws_secret_key,
        aws_region=aws_region
    )

    message = client.messages.create(
        model=aws_model,
        max_tokens=5000,
        temperature=0.4,
        # messages=chat_history + [{"role": "user", "content": system_prompt}],
        messages=[{"role": "user", "content": system_prompt}],
    )

    return message.content[0].text, system_prompt


In [16]:


def validate_response_with_claude(context, generated_response):
    validation_prompt = f"""
You are a critical and precise evaluator.

Your task is to validate whether the following generated responses are:
1. Factually supported by the information in the given question-context pairs.
2. Free from hallucinated or made-up information.
3. Relevant to each question and its context.

Instructions:
- Carefully compare the response to the matching context.
- If all answers are factually grounded and fully supported by the context, respond with: "Valid".
- If any answer contains unsupported, irrelevant, or made-up content, respond with: "Invalid".
- Do NOT explain or include anything other than the word "Valid" or "Invalid".

Question-Context Pairs:
{context}

Generated Response:
{generated_response}
"""

    # Load AWS credentials and config from environment
    aws_access_key = os.getenv("AWS_ACCESS_KEY")
    aws_secret_key = os.getenv("AWS_SECRET_KEY")
    aws_region = os.getenv("AWS_REGION")
    aws_model = os.getenv("AWS_MODEL")  # e.g. "anthropic.claude-3-sonnet-20240229"

    # Initialize Claude (Anthropic) client
    client = AnthropicBedrock(
        aws_access_key=aws_access_key,
        aws_secret_key=aws_secret_key,
        aws_region=aws_region
    )

    message = client.messages.create(
        model=aws_model,
        max_tokens=200,
        temperature=0.0,
        messages=[{"role": "user", "content": validation_prompt.strip()}]
    )

    return message.content[0].text.strip()


In [41]:

response, system_prompt = talk_to_anthropic(final_query_context_string)


In [42]:
validation_result = validate_response_with_claude(final_query_context_string, response)

In [44]:
validation_result

'Valid'