In [1]:
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
import warnings
warnings.filterwarnings('ignore')

load_dotenv()
project_root = Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()
sys.path.insert(0, str(project_root))

print(" Environment loaded")

 Environment loaded


In [2]:
import json
from typing import List, Dict, Optional
from datetime import datetime
import uuid

# MongoDB
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure

print(" All imports successful")

 All imports successful


### 1. Connect to MongoDB

In [4]:
# Get MongoDB URI from environment
MONGODB_URI = os.getenv("MONGODB_URI", "mongodb://localhost:27017")

print(f" Connecting to MongoDB...")
print(f"   URI: {MONGODB_URI[:30]}..." if len(MONGODB_URI) > 30 else f"   URI: {MONGODB_URI}")

try:
    # Connect to MongoDB
    client = MongoClient(MONGODB_URI, serverSelectionTimeoutMS=5000)
    
    # Test connection
    client.admin.command('ping')
    
    print(" MongoDB connected successfully!")
    
    # List existing databases
    db_list = client.list_database_names()
    print(f"Existing databases: {db_list[:5]}")  # Show first 5
    
except ConnectionFailure as e:
    print(f" MongoDB connection failed: {e}")
    print("\n  Make sure MongoDB is running or check your connection string")

 Connecting to MongoDB...
   URI: mongodb+srv://koushikmaram17_d...
 MongoDB connected successfully!
Existing databases: ['sample_mflix', 'admin', 'local']


In [5]:
# Database and collection names
DB_NAME = "agri_chatbot"
CONVERSATIONS_COLLECTION = "conversations"
SESSIONS_COLLECTION = "sessions"

# Connect to database
db = client[DB_NAME]

# Get or create collections
conversations = db[CONVERSATIONS_COLLECTION]
sessions = db[SESSIONS_COLLECTION]

print(f"Database: {DB_NAME}")
print(f"Collections created:")
print(f"   - {CONVERSATIONS_COLLECTION}")
print(f"   - {SESSIONS_COLLECTION}")

Database: agri_chatbot
Collections created:
   - conversations
   - sessions


### 2. Design Conversation Schema

In [6]:
def create_session_id() -> str:
    """Generate unique session ID"""
    return str(uuid.uuid4())

def create_conversation(session_id: str, user_id: str = "default_user") -> Dict:
    """Create new conversation document"""
    return {
        "session_id": session_id,
        "user_id": user_id,
        "messages": [],
        "context_summary": "",
        "last_intent": None,
        "query_count": 0,
        "created_at": datetime.utcnow(),
        "updated_at": datetime.utcnow()
    }

def add_message(
    session_id: str,
    role: str,
    content: str,
    intent: Optional[str] = None,
    sources: Optional[List[str]] = None,
    metadata: Optional[Dict] = None
) -> Dict:
    """Create a message object"""
    message = {
        "timestamp": datetime.utcnow(),
        "role": role,  # "user" or "assistant"
        "content": content,
    }
    
    if role == "assistant":
        message["intent"] = intent
        message["sources"] = sources or []
        message["metadata"] = metadata or {}
    
    return message

print("Schema helper functions ready")

Schema helper functions ready


### 3. CRUD Operations

In [None]:
def save_conversation(
    session_id: str,
    user_message: str,
    assistant_message: str,
    intent: str,
    sources: List[str] = None,
    confidence: float = 0.0
):
    """Save or update conversation in MongoDB"""
    
    # Check if conversation exists
    conversation = conversations.find_one({"session_id": session_id})
    
    if not conversation:
        # Create new conversation
        conversation = create_conversation(session_id)
        conversations.insert_one(conversation)
        print(f" Created new conversation: {session_id}")
    
    # Add user message
    user_msg = add_message(session_id, "user", user_message)
    
    # Add assistant message
    assistant_msg = add_message(
        session_id, 
        "assistant", 
        assistant_message,
        intent=intent,
        sources=sources or [],
        metadata={"confidence": confidence}
    )
    
    # Update conversation
    conversations.update_one(
        {"session_id": session_id},
        {
            "$push": {
                "messages": {"$each": [user_msg, assistant_msg]}
            },
            "$set": {
                "last_intent": intent,
                "updated_at": datetime.utcnow()
            },
            "$inc": {
                "query_count": 1
            }
        }
    )
    
    print(f"Conversation updated: {session_id}")
    return session_id

print("Save function ready")

Save function ready


In [8]:
def get_conversation(session_id: str, last_n: int = None) -> Optional[Dict]:
    """Retrieve conversation history"""
    
    conversation = conversations.find_one({"session_id": session_id})
    
    if not conversation:
        return None
    
    # Get last N messages if specified
    if last_n and len(conversation.get("messages", [])) > last_n:
        conversation["messages"] = conversation["messages"][-last_n:]
    
    return conversation

print("Retrieve function ready")

Retrieve function ready


In [9]:
def delete_conversation(session_id: str) -> bool:
    """Delete conversation by session ID"""
    
    result = conversations.delete_one({"session_id": session_id})
    
    if result.deleted_count > 0:
        print(f" Deleted conversation: {session_id}")
        return True
    else:
        print(f" Conversation not found: {session_id}")
        return False

print("Delete function ready")

Delete function ready


In [10]:
def get_context(session_id: str, max_messages: int = 5) -> str:
    """Get formatted conversation context for LLM"""
    
    conversation = get_conversation(session_id, last_n=max_messages)
    
    if not conversation or not conversation.get("messages"):
        return ""
    
    # Format as conversation history
    context_parts = []
    for msg in conversation["messages"]:
        role = msg["role"].upper()
        content = msg["content"]
        context_parts.append(f"{role}: {content}")
    
    return "\n".join(context_parts)

print("Context function ready")

Context function ready


### 4. Test Conversation Storage

In [11]:
# Create test session
test_session_id = create_session_id()

print(f" Testing conversation storage")
print(f" Session ID: {test_session_id}\n")

# Simulate first query
save_conversation(
    session_id=test_session_id,
    user_message="What are the symptoms of citrus canker?",
    assistant_message="Citrus canker causes raised lesions on leaves, stems, and fruit. Symptoms include brown spots with yellow halos.",
    intent="disease",
    sources=["CitrusPlantPestsAndDiseases.pdf - Page 21"],
    confidence=0.85
)

# Retrieve and display
conversation = get_conversation(test_session_id)
print(f"\n Conversation Details:")
print(f"   Query count: {conversation['query_count']}")
print(f"   Last intent: {conversation['last_intent']}")
print(f"   Messages: {len(conversation['messages'])}")

 Testing conversation storage
 Session ID: 43e0be40-b075-4b0f-8f57-23a9385a826f

üìù Created new conversation: 43e0be40-b075-4b0f-8f57-23a9385a826f
Conversation updated: 43e0be40-b075-4b0f-8f57-23a9385a826f

 Conversation Details:
   Query count: 1
   Last intent: disease
   Messages: 2


In [12]:
# Add follow-up questions
print("\n Adding follow-up messages...\n")

# Second query
save_conversation(
    session_id=test_session_id,
    user_message="How do I prevent it?",
    assistant_message="Prevention includes: 1) Use disease-free nursery stock, 2) Apply copper sprays, 3) Control citrus leafminer, 4) Plant windbreaks.",
    intent="disease",
    sources=["CitrusPlantPestsAndDiseases.pdf - Page 22"],
    confidence=0.78
)

# Third query
save_conversation(
    session_id=test_session_id,
    user_message="Are there any government schemes for disease management?",
    assistant_message="Yes, the National Horticulture Mission provides assistance for disease management and replanting with certified material.",
    intent="hybrid",
    sources=["GovernmentSchemes.pdf - Page 15"],
    confidence=0.82
)

# Retrieve full conversation
conversation = get_conversation(test_session_id)
print(f" Conversation updated!")
print(f"   Total queries: {conversation['query_count']}")
print(f"   Total messages: {len(conversation['messages'])}")


 Adding follow-up messages...

Conversation updated: 43e0be40-b075-4b0f-8f57-23a9385a826f
Conversation updated: 43e0be40-b075-4b0f-8f57-23a9385a826f
 Conversation updated!
   Total queries: 3
   Total messages: 6


In [13]:
# Display formatted conversation
print("\n Full Conversation History:\n")
print("=" * 80)

for i, msg in enumerate(conversation['messages'], 1):
    role = " USER" if msg['role'] == "user" else " ASSISTANT"
    print(f"\n{role} ({msg['timestamp'].strftime('%H:%M:%S')}):")
    print(f"   {msg['content'][:150]}...")
    
    if msg['role'] == "assistant":
        print(f"   Intent: {msg.get('intent', 'N/A')}")
        print(f"   Sources: {len(msg.get('sources', []))} documents")

print("\n" + "=" * 80)


 Full Conversation History:


 USER (22:07:23):
   What are the symptoms of citrus canker?...

 ASSISTANT (22:07:23):
   Citrus canker causes raised lesions on leaves, stems, and fruit. Symptoms include brown spots with yellow halos....
   Intent: disease
   Sources: 1 documents

 USER (22:07:46):
   How do I prevent it?...

 ASSISTANT (22:07:46):
   Prevention includes: 1) Use disease-free nursery stock, 2) Apply copper sprays, 3) Control citrus leafminer, 4) Plant windbreaks....
   Intent: disease
   Sources: 1 documents

 USER (22:07:47):
   Are there any government schemes for disease management?...

 ASSISTANT (22:07:47):
   Yes, the National Horticulture Mission provides assistance for disease management and replanting with certified material....
   Intent: hybrid
   Sources: 1 documents



In [None]:
# Test getting last 3 messages as context
context = get_context(test_session_id, max_messages=4)

print("\n Context for LLM (last 4 messages):")
print("=" * 80)
print(context)
print("=" * 80)


üìù Context for LLM (last 4 messages):
USER: How do I prevent it?
ASSISTANT: Prevention includes: 1) Use disease-free nursery stock, 2) Apply copper sprays, 3) Control citrus leafminer, 4) Plant windbreaks.
USER: Are there any government schemes for disease management?
ASSISTANT: Yes, the National Horticulture Mission provides assistance for disease management and replanting with certified material.


### 5. Context Summarization

In [16]:
def summarize_context(session_id: str) -> str:
    """Generate simple context summary"""
    
    conversation = get_conversation(session_id)
    
    if not conversation or not conversation.get("messages"):
        return "No conversation history"
    
    messages = conversation['messages']
    query_count = conversation['query_count']
    last_intent = conversation['last_intent']
    
    # Count intents
    intents = {}
    for msg in messages:
        if msg['role'] == 'assistant' and msg.get('intent'):
            intent = msg['intent']
            intents[intent] = intents.get(intent, 0) + 1
    
    # Create summary
    summary = f"Conversation with {query_count} queries. "
    summary += f"Topics: {', '.join([f'{k} ({v})' for k, v in intents.items()])}. "
    summary += f"Last intent: {last_intent}."
    
    return summary

# Test summarization
summary = summarize_context(test_session_id)
print(f" Context Summary:")
print(f"   {summary}")

# Save summary to conversation
conversations.update_one(
    {"session_id": test_session_id},
    {"$set": {"context_summary": summary}}
)
print(f"\n Summary saved to database")

 Context Summary:
   Conversation with 3 queries. Topics: disease (2), hybrid (1). Last intent: hybrid.

 Summary saved to database


### 6. Session Management

In [17]:
def create_session_metadata(session_id: str, first_query: str):
    """Create session metadata document"""
    
    session_doc = {
        "session_id": session_id,
        "first_query": first_query,
        "intent_distribution": {},
        "total_queries": 0,
        "avg_confidence": 0.0,
        "created_at": datetime.utcnow(),
        "last_active": datetime.utcnow(),
        "is_active": True
    }
    
    sessions.insert_one(session_doc)
    return session_doc

def update_session_stats(session_id: str):
    """Update session statistics"""
    
    conversation = get_conversation(session_id)
    
    if not conversation:
        return
    
    # Calculate stats
    intents = {}
    confidences = []
    
    for msg in conversation['messages']:
        if msg['role'] == 'assistant':
            intent = msg.get('intent')
            if intent:
                intents[intent] = intents.get(intent, 0) + 1
            
            conf = msg.get('metadata', {}).get('confidence', 0)
            if conf > 0:
                confidences.append(conf)
    
    avg_conf = sum(confidences) / len(confidences) if confidences else 0.0
    
    # Update session
    sessions.update_one(
        {"session_id": session_id},
        {
            "$set": {
                "intent_distribution": intents,
                "total_queries": conversation['query_count'],
                "avg_confidence": avg_conf,
                "last_active": datetime.utcnow()
            }
        },
        upsert=True
    )

print("Session management functions ready")

Session management functions ready


In [18]:
# Create session metadata
session_metadata = create_session_metadata(
    test_session_id,
    "What are the symptoms of citrus canker?"
)

# Update with latest stats
update_session_stats(test_session_id)

# Retrieve session info
session = sessions.find_one({"session_id": test_session_id})

print(f" Session Statistics:")
print(f"   Session ID: {session['session_id'][:20]}...")
print(f"   Total queries: {session['total_queries']}")
print(f"   Intent distribution: {session['intent_distribution']}")
print(f"   Avg confidence: {session['avg_confidence']:.2f}")
print(f"   Created: {session['created_at'].strftime('%Y-%m-%d %H:%M:%S')}")

 Session Statistics:
   Session ID: 43e0be40-b075-4b0f-8...
   Total queries: 3
   Intent distribution: {'disease': 2, 'hybrid': 1}
   Avg confidence: 0.82
   Created: 2026-01-04 22:10:13


### 7. Test Complete Workflow

In [19]:
# Create 3 test sessions with different conversation patterns
print("Creating test sessions...\n")

test_sessions = []

# Session 1: Disease-focused
session1 = create_session_id()
save_conversation(session1, "My leaves are yellow", "This could be HLB disease...", "disease", confidence=0.75)
save_conversation(session1, "How to treat?", "Remove infected trees and control psyllids...", "disease", confidence=0.82)
test_sessions.append(session1)
print(f" Session 1: Disease-focused ({session1[:8]}...)")

# Session 2: Scheme-focused
session2 = create_session_id()
save_conversation(session2, "Drip irrigation subsidy?", "55% subsidy available under PMKSY...", "scheme", confidence=0.88)
save_conversation(session2, "How to apply?", "Apply through District Agriculture Office...", "scheme", confidence=0.90)
test_sessions.append(session2)
print(f" Session 2: Scheme-focused ({session2[:8]}...)")

# Session 3: Hybrid
session3 = create_session_id()
save_conversation(session3, "Citrus canker help", "Symptoms include lesions. Remove infected parts...", "disease", confidence=0.79)
save_conversation(session3, "Government support?", "NHM provides assistance for replanting...", "hybrid", confidence=0.85)
test_sessions.append(session3)
print(f" Session 3: Hybrid ({session3[:8]}...)")

print(f"\n Total test sessions: {len(test_sessions)}")

Creating test sessions...

üìù Created new conversation: 4a523ea2-aa7d-484e-b3a0-beead89fa147
Conversation updated: 4a523ea2-aa7d-484e-b3a0-beead89fa147
Conversation updated: 4a523ea2-aa7d-484e-b3a0-beead89fa147
 Session 1: Disease-focused (4a523ea2...)
üìù Created new conversation: df5dd9ef-6061-4f06-88e2-bc0881809c21
Conversation updated: df5dd9ef-6061-4f06-88e2-bc0881809c21
Conversation updated: df5dd9ef-6061-4f06-88e2-bc0881809c21
 Session 2: Scheme-focused (df5dd9ef...)
üìù Created new conversation: 8876eb7b-ef03-4cb5-9306-4e23ab68432f
Conversation updated: 8876eb7b-ef03-4cb5-9306-4e23ab68432f
Conversation updated: 8876eb7b-ef03-4cb5-9306-4e23ab68432f
 Session 3: Hybrid (8876eb7b...)

 Total test sessions: 3


In [21]:
# Get all sessions
all_sessions = list(sessions.find().sort("created_at", -1).limit(5))

print(f"\n Recent Sessions:\n")
print("=" * 80)

for i, sess in enumerate(all_sessions, 1):
    print(f"\n{i}. Session {sess['session_id'][:16]}...")
    print(f"   Queries: {sess.get('total_queries', 0)}")
    print(f"   Intents: {sess.get('intent_distribution', {})}")
    print(f"   Avg Confidence: {sess.get('avg_confidence', 0):.2f}")
    print(f"   Active: {sess.get('is_active', False)}")

print("\n" + "=" * 80)


 Recent Sessions:


1. Session 43e0be40-b075-4b...
   Queries: 3
   Intents: {'disease': 2, 'hybrid': 1}
   Avg Confidence: 0.82
   Active: True



In [22]:
# Get database statistics
total_conversations = conversations.count_documents({})
total_sessions = sessions.count_documents({})

# Calculate total messages
pipeline = [
    {"$project": {"message_count": {"$size": "$messages"}}},
    {"$group": {"_id": None, "total": {"$sum": "$message_count"}}}
]
result = list(conversations.aggregate(pipeline))
total_messages = result[0]['total'] if result else 0

print(f" MongoDB Statistics:")
print(f"   Database: {DB_NAME}")
print(f"   Total conversations: {total_conversations}")
print(f"   Total sessions: {total_sessions}")
print(f"   Total messages: {total_messages}")
print(f"   Avg messages/conversation: {total_messages/total_conversations:.1f}")

 MongoDB Statistics:
   Database: agri_chatbot
   Total conversations: 4
   Total sessions: 1
   Total messages: 18
   Avg messages/conversation: 4.5


In [23]:
phase3_config = {
    "mongodb_uri": MONGODB_URI.split('@')[0] + '@***',  # Hide credentials
    "database": DB_NAME,
    "collections": {
        "conversations": CONVERSATIONS_COLLECTION,
        "sessions": SESSIONS_COLLECTION
    },
    "features": [
        "conversation_storage",
        "multi_turn_context",
        "session_management",
        "context_summarization"
    ],
    "test_sessions": len(test_sessions),
    "created_at": datetime.utcnow().isoformat()
}

with open(project_root / "phase3_config.json", 'w') as f:
    json.dump(phase3_config, f, indent=2)

print(" Phase 3 config saved")
print(json.dumps(phase3_config, indent=2))

 Phase 3 config saved
{
  "mongodb_uri": "mongodb+srv://koushikmaram17_db_user:2sAb6Nb1qsfjTCLj@***",
  "database": "agri_chatbot",
  "collections": {
    "conversations": "conversations",
    "sessions": "sessions"
  },
  "features": [
    "conversation_storage",
    "multi_turn_context",
    "session_management",
    "context_summarization"
  ],
  "test_sessions": 3,
  "created_at": "2026-01-04T22:12:20.318817"
}
