<a href="https://colab.research.google.com/github/kibet14isaac/AI-TOOLS/blob/main/AGRicu_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# src/data_preprocessing.py
import pandas as pd
import numpy as np
import re
from transformers import AutoTokenizer

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text) # Remove punctuation
    # Add more cleaning specific to local dialects/slang
    return text

def load_and_process_queries(filepath):
    df = pd.read_csv(filepath) # Assuming CSV with 'query', 'intent', 'entities'
    df['cleaned_query'] = df['query'].apply(clean_text)
    # Further processing: tokenization, intent/entity labeling
    return df

def preprocess_image(image_path, target_size=(224, 224)):
    # Using Pillow or OpenCV
    from PIL import Image
    img = Image.open(image_path).convert('RGB')
    img = img.resize(target_size)
    img_array = np.array(img) / 255.0 # Normalize pixel values
    return img_array

# src/nlp_model.py
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import torch

class AgriculturalNLU:
    def __init__(self, model_name="bert-base-uncased", num_intents=5, num_entities=10):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        # For intent classification
        self.intent_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_intents)
        # For entity recognition (NER) - requires a different head
        # self.entity_model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=num_entities)
        # Load fine-tuned weights here if available
        # self.intent_model.load_state_dict(torch.load("models/intent_model.pth"))

    def predict_intent(self, text):
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        with torch.no_grad():
            outputs = self.intent_model(**inputs)
        # Get predicted intent (e.g., "pest_identification", "fertilizer_advice")
        return torch.argmax(outputs.logits).item() # Map to actual intent label

    # Add predict_entities method
    # Add a RAG component that integrates with a knowledge base

# src/cv_model.py
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

class PlantDiseaseClassifier:
    def __init__(self, num_classes=20, input_shape=(224, 224, 3)):
        base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(1024, activation='relu')(x)
        predictions = Dense(num_classes, activation='softmax')(x)
        self.model = Model(inputs=base_model.input, outputs=predictions)
        # Load fine-tuned weights here
        # self.model.load_weights("models/cv_model_weights.h5")

    def train(self, train_data, val_data, epochs=10, batch_size=32):
        self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        self.model.fit(train_data, validation_data=val_data, epochs=epochs, batch_size=batch_size)

    def predict(self, image_array):
        # image_array should be preprocessed (resized, normalized)
        return self.model.predict(np.expand_dims(image_array, axis=0)) # Returns probabilities

# src/chatbot_logic.py
from src.nlp_model import AgriculturalNLU
from src.cv_model import PlantDiseaseClassifier
# Assuming a simple knowledge base for demonstration
knowledge_base = {
    "pest_identification": {
        "maize_armyworm": "Symptoms: Ragged holes in leaves, faecal pellets. Control: Use 'Green Guard' pesticide or neem extract. See more at KALRO website.",
        "potato_late_blight": "Symptoms: Brown/black lesions on leaves and stems, white mold on underside. Control: Apply copper-based fungicide or resistant varieties. Check weather for humidity.",
    },
    "fertilizer_advice": {
        "maize": "For maize, use DAP at planting and CAN for top dressing. Soil test recommended for precise amounts.",
        "kale": "Kale needs nitrogen-rich fertilizer. Use urea or manure.",
    },
    "weather_forecast": {
        "default": "Please provide your county for a localized weather forecast."
    }
}

class AgriculturalChatbot:
    def __init__(self):
        self.nlu = AgriculturalNLU()
        self.cv_classifier = PlantDiseaseClassifier()
        # Initialize knowledge base for RAG (e.g., a Faiss index or simple dict)

    def process_text_query(self, query, farmer_context={}):
        intent_id = self.nlu.predict_intent(query)
        # Map intent_id to intent_name
        intent_name = self.nlu.get_intent_name(intent_id) # Assume this method exists

        # Extract entities like crop, location, symptoms
        # entities = self.nlu.predict_entities(query)

        response = "I'm sorry, I couldn't understand that. Could you please rephrase?"

        if intent_name == "pest_identification":
            # Example: Search knowledge base for pest/disease information
            if "maize" in query and "armyworm" in query:
                response = knowledge_base["pest_identification"]["maize_armyworm"]
            else:
                response = "Please describe the symptoms or upload an image for pest/disease identification."
        elif intent_name == "fertilizer_advice":
            if "maize" in query:
                response = knowledge_base["fertilizer_advice"]["maize"]
            else:
                response = "Which crop are you asking about for fertilizer?"
        elif intent_name == "weather_forecast":
            # Call weather API based on farmer's stored location or extracted entity
            # For this example, just a generic response
            if 'county' in farmer_context:
                response = f"Fetching weather for {farmer_context['county']}... (placeholder for API call)"
            else:
                response = knowledge_base["weather_forecast"]["default"]

        # Implement RAG logic: Retrieve relevant docs from knowledge base and pass to LLM for coherent response.
        # This is where the LLM integration would happen, synthesizing info.

        return response

    def process_image_query(self, image_array):
        prediction = self.cv_classifier.predict(image_array)
        # Map prediction to disease/pest name and retrieve advice from knowledge base
        disease_name = self.cv_classifier.get_disease_name(prediction) # Assume this
        advice = knowledge_base["pest_identification"].get(disease_name, "Could not find specific advice for this. Please consult an extension officer.")
        return f"Based on the image, it appears to be: {disease_name}. Advice: {advice}"

# src/api.py (Example using Flask or FastAPI for WhatsApp/SMS integration)
from flask import Flask, request, jsonify

app = Flask(__name__)
chatbot = AgriculturalChatbot()

@app.route('/webhook', methods=['POST'])
def webhook():
    data = request.json # This will vary based on WhatsApp Business API/Twilio

    # Extract message and sender ID
    sender_id = data.get('from')
    message_type = data.get('type') # 'text' or 'image'

    if message_type == 'text':
        user_message = data.get('text', {}).get('body')
        # Simulate farmer context from a database
        farmer_context = {"county": "Nakuru"} # Load from DB based on sender_id
        response = chatbot.process_text_query(user_message, farmer_context)
    elif message_type == 'image':
        image_url = data.get('image', {}).get('url')
        # Download image, preprocess it, then pass to CV model
        # For simplicity, skipping download and processing here
        # image_array = preprocess_image_from_url(image_url)
        # response = chatbot.process_image_query(image_array)
        response = "Image analysis feature coming soon! Please describe the symptoms for now."
    else:
        response = "I can only process text and images at the moment."

    # Send response back via WhatsApp Business API / Twilio
    # This involves making a POST request to their API
    # For now, just return it as JSON for demonstration
    return jsonify({"reply": response})

if __name__ == '__main__':
    # For development, run with Flask's built-in server
    app.run(debug=True, port=5000)
    # For production, use Gunicorn or similar WSGI server