In [1]:
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
import warnings
warnings.filterwarnings('ignore')

load_dotenv()
project_root = Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()
sys.path.insert(0, str(project_root))

print(" Environment loaded")

 Environment loaded


In [2]:
import json
from typing import Dict, List, Optional, Tuple
from datetime import datetime
from enum import Enum

# LangChain with Groq instead of Gemini
from langchain_groq import ChatGroq  
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field

print(" All imports successful")

 All imports successful


### 1. Load Previous Configurations


In [3]:
# Load Phase 2 & 3 configs
phase2_config_path = project_root / "phase2_config.json"
phase3_config_path = project_root / "phase3_config.json"

if phase2_config_path.exists():
    with open(phase2_config_path, 'r') as f:
        phase2_config = json.load(f)
    print(" Phase 2 config loaded")
    print(f"   Disease index: {phase2_config['disease_index']}")
    print(f"   Scheme index: {phase2_config['scheme_index']}")

if phase3_config_path.exists():
    with open(phase3_config_path, 'r') as f:
        phase3_config = json.load(f)
    print(" Phase 3 config loaded")
    print(f"   Database: {phase3_config['database']}")

 Phase 2 config loaded
   Disease index: agri-chatbot-disease
   Scheme index: agri-chatbot-scheme
 Phase 3 config loaded
   Database: agri_chatbot


### 2. Define Intent Types & Schema


In [4]:
class IntentType(str, Enum):
    """Possible intent types"""
    DISEASE = "disease"
    SCHEME = "scheme"
    HYBRID = "hybrid"
    UNCLEAR = "unclear"

class IntentClassification(BaseModel):
    """Intent classification result schema"""
    intent: str = Field(description="Classified intent: disease, scheme, hybrid, or unclear")
    confidence: float = Field(description="Confidence score (0.0 to 1.0)")
    reasoning: str = Field(description="Brief explanation for the classification")
    entities: Dict[str, List[str]] = Field(
        default_factory=dict,
        description="Extracted entities (diseases, schemes, locations, etc.)"
    )
    needs_clarification: bool = Field(
        default=False,
        description="Whether the query needs clarification"
    )
    clarification_question: Optional[str] = Field(
        default=None,
        description="Clarification question if needed"
    )

print(" Intent schema defined")
print("\nIntent types:")
for intent in IntentType:
    print(f"   ‚Ä¢ {intent.value}")

 Intent schema defined

Intent types:
   ‚Ä¢ disease
   ‚Ä¢ scheme
   ‚Ä¢ hybrid
   ‚Ä¢ unclear


### 3. Initialize Groq LLM


In [5]:
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here":
    print(" GROQ_API_KEY not found in .env")
    print("  Please add your API key to continue")
else:
    # Initialize Groq LLM
    llm = ChatGroq(
        model="llama-3.3-70b-versatile",  # Fast and capable model
        temperature=0,
        api_key=GROQ_API_KEY
    )
    
    print(" Groq LLM initialized")
    print(f"   Model: llama-3.3-70b-versatile")
    print(f"   Temperature: 0")

 Groq LLM initialized
   Model: llama-3.3-70b-versatile
   Temperature: 0


### 4. Create Intent Classification Prompt


In [6]:
intent_classification_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert agricultural assistant specializing in citrus farming.
Your task is to classify farmer queries into one of these intents:

1. **disease**: Query about citrus diseases, pests, symptoms, treatment, prevention
   Examples: "yellow leaves", "citrus canker treatment", "whitefly control"

2. **scheme**: Query about government schemes, subsidies, financial assistance
   Examples: "drip irrigation subsidy", "PMKSY scheme", "loan for farmers"

3. **hybrid**: Query combining BOTH disease/pest issues AND government support
   Examples: "schemes for disease management", "subsidy for pest control equipment"

4. **unclear**: Query is too vague or ambiguous to classify
   Examples: "help me", "what should I do", "tell me about citrus"

**Classification Rules:**
- If query mentions BOTH a disease/pest AND financial help ‚Üí hybrid
- If query only about disease/pest management ‚Üí disease
- If query only about schemes/subsidies ‚Üí scheme
- If query is too general or vague ‚Üí unclear (provide clarification question)

**Context provided:** {context}

**IMPORTANT: You MUST respond with a valid JSON object with ALL these fields:**
{{
    "intent": "disease|scheme|hybrid|unclear",
    "confidence": 0.85,
    "reasoning": "Brief explanation for classification",
    "entities": {{"diseases": [], "schemes": [], "locations": []}},
    "needs_clarification": false,
    "clarification_question": null
}}

**DO NOT include any text before or after the JSON. ONLY return the JSON object.**"""),
    ("user", "Query: {query}")
])

# Setup output parser with better error handling
parser = JsonOutputParser(pydantic_object=IntentClassification)

# Create chain
intent_chain = intent_classification_prompt | llm | parser

print(" Intent classification chain created")

 Intent classification chain created


### 5. Build Intent Classifier Function


In [7]:
def classify_intent(
    query: str,
    conversation_context: Optional[str] = None
) -> IntentClassification:
    """
    Classify query intent with optional conversation context
    
    Args:
        query: User's query
        conversation_context: Previous conversation history (optional)
    
    Returns:
        IntentClassification object
    """
    
    # Build context string
    context_str = "No previous conversation."
    if conversation_context:
        context_str = f"Previous conversation:\n{conversation_context}"
    
    try:
        # Invoke chain
        result = intent_chain.invoke({
            "query": query,
            "context": context_str
        })
        
        # Ensure all required fields exist with defaults
        result.setdefault("confidence", 0.7)
        result.setdefault("reasoning", "Classification based on query analysis")
        result.setdefault("entities", {})
        result.setdefault("needs_clarification", False)
        result.setdefault("clarification_question", None)
        
        return IntentClassification(**result)
        
    except Exception as e:
        print(f"  Classification error: {e}")
        print(f" Raw result: {result}")
        
        # Fallback: try to fix the result
        fixed_result = {
            "intent": result.get("intent", "unclear"),
            "confidence": result.get("confidence", 0.5),
            "reasoning": result.get("reasoning", "Unable to classify with full confidence"),
            "entities": result.get("entities", {}),
            "needs_clarification": result.get("needs_clarification", True),
            "clarification_question": result.get("clarification_question", 
                                                "Could you provide more details about your query?")
        }
        
        return IntentClassification(**fixed_result)

print(" Intent classifier function ready (with error handling)")

 Intent classifier function ready (with error handling)


### 6. Test Intent Classification


In [8]:
# Test 1: Clear disease query
query1 = "My citrus leaves are showing yellow blotchy patches. What could this be?"

result1 = classify_intent(query1)

print(" Query:", query1)
print(f"\n Classification Result:")
print(f"   Intent: {result1.intent}")
print(f"   Confidence: {result1.confidence:.2f}")
print(f"   Reasoning: {result1.reasoning}")
print(f"   Entities: {result1.entities}")
print(f"   Needs clarification: {result1.needs_clarification}")

 Query: My citrus leaves are showing yellow blotchy patches. What could this be?

 Classification Result:
   Intent: disease
   Confidence: 0.90
   Reasoning: The query mentions a specific symptom (yellow blotchy patches on citrus leaves) which is likely related to a disease or pest issue
   Entities: {'diseases': ['citrus canker'], 'schemes': [], 'locations': []}
   Needs clarification: False


In [9]:
# Test 2: Clear scheme query
query2 = "What government schemes are available for citrus farmers in Andhra Pradesh?"

result2 = classify_intent(query2)

print("üîç Query:", query2)
print(f"\n Classification Result:")
print(f"   Intent: {result2.intent}")
print(f"   Confidence: {result2.confidence:.2f}")
print(f"   Reasoning: {result2.reasoning}")
print(f"   Entities: {result2.entities}")
print(f"   Needs clarification: {result2.needs_clarification}")

üîç Query: What government schemes are available for citrus farmers in Andhra Pradesh?

 Classification Result:
   Intent: scheme
   Confidence: 0.90
   Reasoning: The query specifically asks about government schemes for citrus farmers in a particular location, Andhra Pradesh, without mentioning any disease or pest issues.
   Entities: {'diseases': [], 'schemes': ['government schemes'], 'locations': ['Andhra Pradesh']}
   Needs clarification: False


In [10]:
# Test 3: Hybrid query
query3 = "What government schemes can help me manage Citrus Greening disease in my farm?"

result3 = classify_intent(query3)

print("üîç Query:", query3)
print(f"\n Classification Result:")
print(f"   Intent: {result3.intent}")
print(f"   Confidence: {result3.confidence:.2f}")
print(f"   Reasoning: {result3.reasoning}")
print(f"   Entities: {result3.entities}")
print(f"   Needs clarification: {result3.needs_clarification}")

üîç Query: What government schemes can help me manage Citrus Greening disease in my farm?

 Classification Result:
   Intent: hybrid
   Confidence: 0.90
   Reasoning: The query mentions both a disease (Citrus Greening) and government schemes, which aligns with the hybrid intent.
   Entities: {'diseases': ['Citrus Greening'], 'schemes': [], 'locations': []}
   Needs clarification: False


In [11]:
# Test 4: Unclear/vague query
query4 = "Tell me about citrus"

result4 = classify_intent(query4)

print("üîç Query:", query4)
print(f"\nüìä Classification Result:")
print(f"   Intent: {result4.intent}")
print(f"   Confidence: {result4.confidence:.2f}")
print(f"   Reasoning: {result4.reasoning}")
print(f"   Needs clarification: {result4.needs_clarification}")
if result4.clarification_question:
    print(f"\nüí¨ Clarification Question:")
    print(f"   {result4.clarification_question}")

üîç Query: Tell me about citrus

üìä Classification Result:
   Intent: unclear
   Confidence: 0.80
   Reasoning: The query is too general and does not specify a particular aspect of citrus farming.
   Needs clarification: True

üí¨ Clarification Question:
   What specific aspect of citrus farming would you like to know about, such as diseases, cultivation, or government schemes?


### 7. Test Context-Aware Classification


In [12]:
# Simulate conversation context from previous conversation
conversation_context = """USER: My citrus leaves are showing yellow blotchy patches
ASSISTANT: This could be Huanglongbing (HLB) disease. Symptoms include asymmetrical mottling on leaves."""

# Follow-up query (pronoun reference - "it")
follow_up_query = "How do I prevent it?"

result_context = classify_intent(follow_up_query, conversation_context)

print("Previous Context:")
print(conversation_context)
print(f"\n Follow-up Query: '{follow_up_query}'")
print(f"\n Classification with Context:")
print(f"   Intent: {result_context.intent}")
print(f"   Confidence: {result_context.confidence:.2f}")
print(f"   Reasoning: {result_context.reasoning}")

# Compare with no context
result_no_context = classify_intent(follow_up_query)
print(f"\n Classification WITHOUT Context:")
print(f"   Intent: {result_no_context.intent}")
print(f"   Confidence: {result_no_context.confidence:.2f}")
print(f"   Reasoning: {result_no_context.reasoning}")

Previous Context:
USER: My citrus leaves are showing yellow blotchy patches
ASSISTANT: This could be Huanglongbing (HLB) disease. Symptoms include asymmetrical mottling on leaves.

 Follow-up Query: 'How do I prevent it?'

 Classification with Context:
   Intent: disease
   Confidence: 0.90
   Reasoning: The user is asking for prevention methods, which is directly related to disease management, following the previous conversation about Huanglongbing (HLB) disease.

 Classification WITHOUT Context:
   Intent: unclear
   Confidence: 0.80
   Reasoning: The query is too vague and does not specify what needs to be prevented.


### 8. Batch Testing & Accuracy Measurement


In [13]:
# Comprehensive test dataset with expected intents
test_queries = [
    # Disease queries
    ("What are the symptoms of citrus canker?", "disease"),
    ("How to control whitefly infestation?", "disease"),
    ("My leaves are turning yellow", "disease"),
    ("Treatment for citrus greening", "disease"),
    ("Pest control for citrus psyllid", "disease"),
    
    # Scheme queries
    ("What subsidies are available for drip irrigation?", "scheme"),
    ("How to apply for PMKSY scheme?", "scheme"),
    ("Agricultural loan interest rates", "scheme"),
    ("NHM scheme eligibility criteria", "scheme"),
    ("Government support for organic farming", "scheme"),
    
    # Hybrid queries
    ("Government support for pest control equipment?", "hybrid"),
    ("Schemes for HLB disease management", "hybrid"),
    ("Financial help for replanting diseased trees", "hybrid"),
    ("Subsidy for disease resistant varieties", "hybrid"),
    ("Government assistance for citrus canker control", "hybrid"),
    
    # Unclear queries
    ("Tell me about farming", "unclear"),
    ("Help me", "unclear"),
    ("What should I do?", "unclear"),
    ("Citrus", "unclear"),
]

print(f" Test Dataset: {len(test_queries)} queries")
print("\n Distribution:")
print(f"   ‚Ä¢ Disease: {sum(1 for _, intent in test_queries if intent == 'disease')} queries")
print(f"   ‚Ä¢ Scheme: {sum(1 for _, intent in test_queries if intent == 'scheme')} queries")
print(f"   ‚Ä¢ Hybrid: {sum(1 for _, intent in test_queries if intent == 'hybrid')} queries")
print(f"   ‚Ä¢ Unclear: {sum(1 for _, intent in test_queries if intent == 'unclear')} queries")

 Test Dataset: 19 queries

 Distribution:
   ‚Ä¢ Disease: 5 queries
   ‚Ä¢ Scheme: 5 queries
   ‚Ä¢ Hybrid: 5 queries
   ‚Ä¢ Unclear: 4 queries


In [14]:
from tqdm.auto import tqdm
import time

# Run classification on all test queries
results = []

print(" Running batch classification...\n")

for query, expected_intent in tqdm(test_queries, desc="Classifying"):
    try:
        result = classify_intent(query)
        results.append({
            "query": query,
            "expected": expected_intent,
            "predicted": result.intent,
            "confidence": result.confidence,
            "reasoning": result.reasoning,
            "correct": result.intent == expected_intent
        })
        
        # Small delay to respect rate limits
        time.sleep(0.5)
        
    except Exception as e:
        print(f"\n  Error classifying '{query}': {e}")
        results.append({
            "query": query,
            "expected": expected_intent,
            "predicted": "error",
            "confidence": 0.0,
            "reasoning": str(e),
            "correct": False
        })

print(f"\n Batch classification complete: {len(results)} queries")

 Running batch classification...



Classifying:   0%|          | 0/19 [00:00<?, ?it/s]


 Batch classification complete: 19 queries


In [15]:
# Calculate overall accuracy
correct = sum(1 for r in results if r["correct"])
total = len(results)
accuracy = (correct / total) * 100

# Per-category metrics
from collections import defaultdict
category_stats = defaultdict(lambda: {"correct": 0, "total": 0, "avg_confidence": []})

for r in results:
    if r["predicted"] != "error":  # Skip errors
        expected = r["expected"]
        category_stats[expected]["total"] += 1
        category_stats[expected]["avg_confidence"].append(r["confidence"])
        if r["correct"]:
            category_stats[expected]["correct"] += 1

print("="*70)
print(" CLASSIFICATION ACCURACY REPORT")
print("="*70)

print(f"\n Overall Metrics:")
print(f"   Accuracy: {accuracy:.1f}% ({correct}/{total})")
avg_conf = sum(r["confidence"] for r in results if r["predicted"] != "error") / len([r for r in results if r["predicted"] != "error"])
print(f"   Average Confidence: {avg_conf:.2f}")

print(f"\n Per-Category Performance:")
print("-"*70)
for category in sorted(category_stats.keys()):
    stats = category_stats[category]
    cat_accuracy = (stats["correct"] / stats["total"]) * 100 if stats["total"] > 0 else 0
    avg_cat_conf = sum(stats["avg_confidence"]) / len(stats["avg_confidence"]) if stats["avg_confidence"] else 0
    
    print(f"   {category.upper():10} ‚Üí {cat_accuracy:5.1f}% ({stats['correct']}/{stats['total']})  |  Avg Confidence: {avg_cat_conf:.2f}")

# Show misclassifications
misclassified = [r for r in results if not r["correct"] and r["predicted"] != "error"]
if misclassified:
    print(f"\n Misclassified Queries ({len(misclassified)}):")
    print("-"*70)
    for i, r in enumerate(misclassified, 1):
        print(f"\n   {i}. \"{r['query']}\"")
        print(f"      Expected: {r['expected']} | Got: {r['predicted']} (confidence: {r['confidence']:.2f})")
        print(f"      Reasoning: {r['reasoning'][:100]}...")

print("\n" + "="*70)

 CLASSIFICATION ACCURACY REPORT

 Overall Metrics:
   Accuracy: 94.7% (18/19)
   Average Confidence: 0.89

 Per-Category Performance:
----------------------------------------------------------------------
   DISEASE    ‚Üí 100.0% (5/5)  |  Avg Confidence: 0.91
   HYBRID     ‚Üí  80.0% (4/5)  |  Avg Confidence: 0.90
   SCHEME     ‚Üí 100.0% (5/5)  |  Avg Confidence: 0.90
   UNCLEAR    ‚Üí 100.0% (4/4)  |  Avg Confidence: 0.85

 Misclassified Queries (1):
----------------------------------------------------------------------

   1. "Government support for pest control equipment?"
      Expected: hybrid | Got: scheme (confidence: 0.90)
      Reasoning: The query is about government support for pest control equipment, which falls under schemes and subs...



### 9. Clarification Question Generation


In [16]:
def classify_with_clarification(
    query: str,
    conversation_context: Optional[str] = None,
    confidence_threshold: float = 0.7
) -> Dict:
    """
    Classify query and determine if clarification is needed
    
    Args:
        query: User's query
        conversation_context: Previous conversation history
        confidence_threshold: Minimum confidence to proceed without clarification
    
    Returns:
        Dict with classification and recommended action
    """
    
    result = classify_intent(query, conversation_context)
    
    response = {
        "intent": result.intent,
        "confidence": result.confidence,
        "reasoning": result.reasoning,
        "entities": result.entities,
        "action": "proceed",  # "proceed" or "clarify"
        "clarification_question": None
    }
    
    # Determine if clarification is needed
    needs_clarification = (
        result.intent == "unclear" or 
        result.confidence < confidence_threshold or
        result.needs_clarification
    )
    
    if needs_clarification:
        response["action"] = "clarify"
        
        # Use LLM's clarification or generate default
        if result.clarification_question:
            response["clarification_question"] = result.clarification_question
        else:
            response["clarification_question"] = (
                "I want to help you better! Could you please specify:\n\n"
                "1Ô∏è‚É£ Are you asking about disease symptoms or pest problems?\n"
                "2Ô∏è‚É£ Are you looking for government schemes or subsidies?\n"
                "3Ô∏è‚É£ Do you need both disease management advice AND financial support?\n\n"
                "Please let me know which one applies to your situation."
            )
    
    return response

print(" Enhanced classifier with clarification logic ready")
print(f"   Confidence threshold: 0.70")

 Enhanced classifier with clarification logic ready
   Confidence threshold: 0.70


In [17]:
# Test with various ambiguous queries
ambiguous_queries = [
    "help me with my farm",
    "what about citrus?",
    "I need information",
    "tell me more",
    "schemes"  # Too vague
]

print(" Testing Clarification Flow")
print("="*70)

for query in ambiguous_queries:
    print(f"\nüîç Query: \"{query}\"")
    print("-"*70)
    
    response = classify_with_clarification(query)
    
    print(f"   Intent: {response['intent']}")
    print(f"   Confidence: {response['confidence']:.2f}")
    print(f"   Action: {response['action'].upper()}")
    
    if response['action'] == 'clarify':
        print(f"\n    Clarification Question:")
        print(f"   {response['clarification_question']}")

print("\n" + "="*70)

 Testing Clarification Flow

üîç Query: "help me with my farm"
----------------------------------------------------------------------
   Intent: unclear
   Confidence: 0.80
   Action: CLARIFY

    Clarification Question:
   What specific issue are you facing with your citrus farm, such as disease management or financial assistance?

üîç Query: "what about citrus?"
----------------------------------------------------------------------
   Intent: unclear
   Confidence: 0.80
   Action: CLARIFY

    Clarification Question:
   Could you please provide more details about what you would like to know about citrus, such as diseases, cultivation, or government schemes?

üîç Query: "I need information"
----------------------------------------------------------------------
   Intent: unclear
   Confidence: 0.90
   Action: CLARIFY

    Clarification Question:
   Could you please provide more details about what kind of information you are looking for regarding citrus farming?

üîç Query: "tell m

### 10. Integration with MongoDB Context


In [18]:
from pymongo import MongoClient

# Connect to MongoDB (from Phase 3)
MONGODB_URI = os.getenv("MONGODB_URI", "mongodb://localhost:27017")

try:
    client = MongoClient(MONGODB_URI, serverSelectionTimeoutMS=3000)
    client.admin.command('ping')
    
    db = client["agri_chatbot"]
    conversations = db["conversations"]
    
    print(" MongoDB connected")
    
    # Check for existing conversations
    conv_count = conversations.count_documents({})
    print(f"   Found {conv_count} conversations")
    
except Exception as e:
    print(f"  MongoDB connection failed: {e}")
    print("   Phase 10 will work with sample data only")
    conversations = None

 MongoDB connected
   Found 4 conversations


In [26]:
def get_conversation_context(session_id: str, max_messages: int = 4) -> Optional[str]:
    """
    Retrieve conversation context from MongoDB
    
    Args:
        session_id: Session ID to retrieve
        max_messages: Maximum number of recent messages to include
    
    Returns:
        Formatted context string or None
    """
    
    if conversations is None:
        return None
    
    conversation = conversations.find_one({"session_id": session_id})
    
    if not conversation or not conversation.get("messages"):
        return None
    
    # Get last N messages
    messages = conversation["messages"][-max_messages:]
    
    # Format as context
    context_parts = []
    for msg in messages:
        role = msg["role"].upper()
        content = msg["content"]
        context_parts.append(f"{role}: {content}")
    
    return "\n".join(context_parts)

print(" Context retrieval function ready")

 Context retrieval function ready


In [27]:
# Test with real conversation data from MongoDB
if conversations is not None:
    # Get a sample session
    sample_conv = conversations.find_one({})
    
    if sample_conv:
        session_id = sample_conv["session_id"]
        context = get_conversation_context(session_id)
        
        print("üìñ Real Conversation Context from MongoDB:")
        print("="*70)
        print(context)
        print("="*70)
        
        # Test follow-up query
        follow_up = "Are there any subsidies for this?"
        
        print(f"\n Follow-up Query: \"{follow_up}\"")
        
        result = classify_with_clarification(follow_up, context)
        
        print(f"\n Classification Result:")
        print(f"   Intent: {result['intent']}")
        print(f"   Confidence: {result['confidence']:.2f}")
        print(f"   Reasoning: {result['reasoning']}")
        print(f"   Action: {result['action']}")
        
        if result['action'] == 'clarify':
            print(f"\n Clarification:")
            print(f"   {result['clarification_question']}")
    else:
        print(" No conversations found in MongoDB")
else:
    print("  MongoDB not available")
    print("   Run Phase 3 notebook to create test conversations")

üìñ Real Conversation Context from MongoDB:
USER: How do I prevent it?
ASSISTANT: Prevention includes: 1) Use disease-free nursery stock, 2) Apply copper sprays, 3) Control citrus leafminer, 4) Plant windbreaks.
USER: Are there any government schemes for disease management?
ASSISTANT: Yes, the National Horticulture Mission provides assistance for disease management and replanting with certified material.

 Follow-up Query: "Are there any subsidies for this?"

 Classification Result:
   Intent: scheme
   Confidence: 0.90
   Reasoning: The query is asking about subsidies, which is directly related to government schemes and financial assistance.
   Action: proceed


In [28]:
phase4_config = {
    "llm_provider": "groq",
    "llm_model": "llama-3.3-70b-versatile",
    "temperature": 0,
    "intent_types": [intent.value for intent in IntentType],
    "confidence_threshold": 0.7,
    "max_context_messages": 4,
    "test_results": {
        "total_queries": len(test_queries),
        "accuracy": f"{accuracy:.1f}%",
        "avg_confidence": f"{avg_conf:.2f}",
        "per_category": {
            category: {
                "accuracy": f"{(stats['correct'] / stats['total'] * 100):.1f}%" if stats['total'] > 0 else "N/A",
                "total": stats['total']
            }
            for category, stats in category_stats.items()
        }
    },
    "features": [
        "intent_classification",
        "clarification_generation",
        "context_aware_classification",
        "entity_extraction",
        "mongodb_integration"
    ],
    "created_at": datetime.now().isoformat()
}

# Save config
config_path = project_root / "phase4_config.json"
with open(config_path, 'w') as f:
    json.dump(phase4_config, f, indent=2)

print(" Phase 4 config saved")
print(f"   Location: {config_path}")
print("\n Configuration Summary:")
print(json.dumps(phase4_config, indent=2))

 Phase 4 config saved
   Location: /Users/kaushik003/Documents/projects/agri-chatbot/phase4_config.json

 Configuration Summary:
{
  "llm_provider": "groq",
  "llm_model": "llama-3.3-70b-versatile",
  "temperature": 0,
  "intent_types": [
    "disease",
    "scheme",
    "hybrid",
    "unclear"
  ],
  "confidence_threshold": 0.7,
  "max_context_messages": 4,
  "test_results": {
    "total_queries": 19,
    "accuracy": "94.7%",
    "avg_confidence": "0.89",
    "per_category": {
      "disease": {
        "accuracy": "100.0%",
        "total": 5
      },
      "scheme": {
        "accuracy": "100.0%",
        "total": 5
      },
      "hybrid": {
        "accuracy": "80.0%",
        "total": 5
      },
      "unclear": {
        "accuracy": "100.0%",
        "total": 4
      }
    }
  },
  "features": [
    "intent_classification",
    "clarification_generation",
    "context_aware_classification",
    "entity_extraction",
    "mongodb_integration"
  ],
  "created_at": "2026-01-05T05:37