In [1]:
import os
import sys
import json
from pathlib import Path
from dotenv import load_dotenv

# Load environment
load_dotenv()
project_root = Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()
sys.path.insert(0, str(project_root))

# LLM Imports
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Config Loading
with open(project_root / "phase4_config.json", 'r') as f: phase4_config = json.load(f)

print(f" Loaded Config. Using Model: {phase4_config['llm_model']}")

 Loaded Config. Using Model: llama-3.3-70b-versatile


In [2]:
# Initialize Groq LLM
llm = ChatGroq(
    model_name=phase4_config['llm_model'],
    temperature=0.3, # Low temperature for factual accuracy
    api_key=os.getenv("GROQ_API_KEY")
)

print("✅ Groq LLM Initialized")

✅ Groq LLM Initialized


In [4]:

# Define Prompt Template

system_template = """
You are an expert Agricultural Assistant helping Indian farmers. 
Your goal is to answer their questions accurately using ONLY the provided context.

### GUIDELINES:
1. **Tone**: Be empathetic, professional, and simple. Use clear English.
2. **Structure**: 
   - Start with a direct answer.
   - Use bullet points for steps or lists.
   - If discussing a chemical, explicitly mention safety warnings if present in context.
3. **Accuracy**: 
   - Answer ONLY based on the "Context" below. 
   - If the answer is not in the context, say: "I am sorry, but I could not find that information in my reference documents."
   - DO NOT make up information.
4. **Citations**: 
   - At the end, list the specific Source Documents used (e.g., "Source: CitrusPlantPestsAndDiseases.pdf").

### CONTEXT:
{context}

### CHAT HISTORY:
{chat_history}

### USER QUESTION:
{question}
"""

prompt = ChatPromptTemplate.from_template(system_template)
chain = prompt | llm | StrOutputParser()

print("Prompt Template Ready")

Prompt Template Ready


In [6]:
# Function to format context for LLM

def format_context(docs):
    """
    Takes the raw list of dicts from Phase 5 and formats it into a string 
    that the LLM can read easily.
    """
    formatted_text = ""
    sources = set()
    
    for i, doc in enumerate(docs, 1):
        content = doc.get('content', '').replace('\n', ' ')
        source = doc.get('metadata', {}).get('source', 'Unknown File')
        
        formatted_text += f"[Document {i} - Source: {source}]\n{content}\n\n"
        sources.add(source)
        
    return formatted_text, list(sources)

print("`format_context` function ready")

`format_context` function ready


In [7]:
# Function to generate answer using LLM

def generate_answer(question: str, retrieved_docs: list, chat_history: str = ""):
    """
    Combines Context + Prompt + LLM to get the final answer.
    """
    # 1. Prepare Context
    context_text, sources = format_context(retrieved_docs)
    
    if not context_text:
        return "I'm sorry, I couldn't find any relevant documents to answer your question."
    
    # 2. Invoke LLM
    response = chain.invoke({
        "context": context_text,
        "chat_history": chat_history,
        "question": question
    })
    
    return response

print("`generate_answer` function ready")

`generate_answer` function ready


### Testing with Mock Data
Before we connect the full retrieval pipeline, let's test if the LLM generates good answers using **fake** retrieved data. This isolates the "Generation" logic from the "Retrieval" logic.

In [8]:
# 1. Mock Context (Simulating what Phase 5 would return)
mock_docs_disease = [
    {
        "content": "To control Citrus Canker, prune the infected twigs. Spray Copper Oxychloride (0.3%) at 15-day intervals. Ensure proper orchard hygiene.",
        "metadata": {"source": "CitrusPlantPestsAndDiseases.pdf"}
    },
    {
        "content": "Citrus Canker is a bacterial disease caused by Xanthomonas. It causes corky lesions on fruit and leaves.",
        "metadata": {"source": "CitrusPlantPestsAndDiseases.pdf"}
    }
]

mock_docs_scheme = [
    {
        "content": "The PMKSY scheme provides up to 55% subsidy for small farmers installing Drip Irrigation. The application must be submitted via the state portal.",
        "metadata": {"source": "GovernmentSchemes.pdf"}
    }
]

print(" TEST 1: Disease Query")
print("-" * 50)
ans1 = generate_answer("How do I treat Canker?", mock_docs_disease)
print(ans1)

print("\n\n TEST 2: Scheme Query")
print("-" * 50)
ans2 = generate_answer("Is there subsidy for drip irrigation?", mock_docs_scheme)
print(ans2)

 TEST 1: Disease Query
--------------------------------------------------
To treat Citrus Canker, you can follow these steps:
* Prune the infected twigs
* Spray Copper Oxychloride (0.3%) at 15-day intervals
* Ensure proper orchard hygiene

Please note that when handling Copper Oxychloride, it's essential to follow safety guidelines to avoid any potential harm.

Source: CitrusPlantPestsAndDiseases.pdf


 TEST 2: Scheme Query
--------------------------------------------------
Yes, there is a subsidy for drip irrigation. The PMKSY scheme provides up to 55% subsidy for small farmers installing Drip Irrigation. 

To avail of this subsidy, you will need to:
* Submit your application via the state portal

Source: GovernmentSchemes.pdf


In [9]:
phase6_config = {
    "generation_model": phase4_config['llm_model'],
    "temperature": 0.3,
    "persona": "Expert Agricultural Assistant",
    "citation_style": "Explicit Source Listing",
    "created_at": "2026-01-05T07:00:00.000000"
}

with open(project_root / "phase6_config.json", 'w') as f:
    json.dump(phase6_config, f, indent=2)

print(" Phase 6 Configuration saved")

 Phase 6 Configuration saved
