In [1]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from typing import Dict, Any, List, Optional
import joblib
import numpy as np
import pandas as pd
from langgraph.graph import StateGraph, START, END
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
import os
from datetime import datetime
import logging
from dotenv import load_dotenv

# Import your preprocessing functions
from preprocessing import SoilDataPreprocessor

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


'   O'
'   o'
'  Zn'
'  Zn'

'    O'
'    O'

In [2]:
# LangGraph workflow state
class WorkflowState(BaseModel):
    soil_data: Dict[str, Any] = {}
    fertility_prediction: Optional[str] = None  # Changed from str = ""
    fertility_confidence: Optional[float] = None  # Changed from float = 0.0
    fertilizer_prediction: Optional[str] = None  # Changed from str = ""
    fertilizer_confidence: Optional[float] = None  # Changed from float = 0.0
    explanation: Optional[str] = None  # Changed from str = ""
    recommendations: List[str] = []

In [3]:
def prepare_soil_dataframe(soil_data: Dict[str, Any]) -> pd.DataFrame:
    """Convert soil data dictionary to DataFrame with proper column names"""
    # Map API field names to expected DataFrame column names
    column_mapping = {
        'simplified_texture': 'Simplified Texture',
        'ph': 'pH',
        'n': 'N',
        'p': 'P', 
        'k': 'K',
        'o': 'O',
        'ca': 'Ca',
        'mg': 'Mg',
        'cu': 'Cu',
        'fe': 'Fe',
        'zn': 'Zn',
    }
    
    # Create DataFrame with proper column names
    mapped_data = {column_mapping.get(k, k): v for k, v in soil_data.items()}
    df = pd.DataFrame([mapped_data])
    
    return df

In [4]:
def predict_fertility_node(state: WorkflowState) -> WorkflowState:
    """Predict soil fertility status"""
    try:
        # Load the preprocessor model
        fertility_model = joblib.load("/Users/oscar/Desktop/data-project/Fertiliser_Modelling/models/Soil_Status_randomForest_Classifier_Model.joblib")        
        # Prepare input data
        df = prepare_soil_dataframe(state.soil_data)

        fertility_preprocessor = SoilDataPreprocessor()
        
        # Apply preprocessing if preprocessor is available
        if fertility_preprocessor is not None:
            df_processed = fertility_preprocessor.fit_transform(df)
        else:
            # Basic preprocessing fallback
            logger.warning("Fertility preprocessor not available, using basic preprocessing")
        
        # Extract features (exclude non-feature columns)
        feature_columns = ['Simplified Texture', 'pH', 'N', 'P', 'K', 'O', 'Ca', 'Mg', 'Cu', 'Fe', 'Zn']
        features = df_processed[feature_columns].values
        
        # Make prediction
        fertility_pred = fertility_model.predict(features)
        fertility_proba = fertility_model.predict_proba(features)
        
        # Map numeric predictions to status classes
        fertility_status_map = {
            0: "MODERATELY HEALTHY",
            1: "POOR",
            2: "VERY POOR"
        }
        
        # Convert numeric prediction to status class
        fertility_status = fertility_status_map.get(fertility_pred[0], "UNKNOWN")
        
        state.fertility_prediction = fertility_status
        state.fertility_confidence = float(np.max(fertility_proba))
        
        logger.info(f"Fertility prediction: {state.fertility_prediction} (confidence: {state.fertility_confidence:.2f})")
        return state
        
    except Exception as e:
        logger.error(f"Error in fertility prediction: {e}")
        return state

In [5]:
# Create sample test data
test_soil_data = {
    "Simplified Texture": "Loamy",
    "pH": 6.8,
    "N": 45.5,  # Nitrogen in mg/kg
    "P": 35.2,  # Phosphorus in mg/kg
    "K": 180.0, # Potassium in mg/kg
    "O": 2.5,   # Organic matter %
    "Ca": 12.50, # Calcium in mg/kg
    "Mg": 220,  # Magnesium in mg/kg
    "Cu": 1.8,  # Copper in mg/kg
    "Fe": 45.0, # Iron in mg/kg
    "Zn": 2.2   # Zinc in mg/kg
}

# Create a test state
test_state = WorkflowState(soil_data=test_soil_data)

# Run the prediction
result_state = predict_fertility_node(test_state)

# Display results
print(f"\nFertility Prediction: {result_state.fertility_prediction}")
print(f"Confidence: {result_state.fertility_confidence:.2f}")

# Optional: Display the input data as a DataFrame for visualization
print("\nInput Data:")
print(pd.DataFrame([test_soil_data]).to_string())

INFO:__main__:Fertility prediction: MODERATELY HEALTHY (confidence: 0.98)



Fertility Prediction: MODERATELY HEALTHY
Confidence: 0.98

Input Data:
  Simplified Texture   pH     N     P      K    O    Ca   Mg   Cu    Fe   Zn
0              Loamy  6.8  45.5  35.2  180.0  2.5  12.5  220  1.8  45.0  2.2


In [6]:
def predict_fertilizer_node(state: WorkflowState) -> WorkflowState:
    """Predict fertilizer recommendation"""
    try:
        # Load the preprocessor and model
        fertilizer_preprocessor = SoilDataPreprocessor()
        fertilizer_model = joblib.load("/Users/oscar/Desktop/data-project/Fertiliser_Modelling/models/Fertilizers_xgb_Classifier_Model.joblib")
        
        # Prepare input data including fertility prediction
        df = prepare_soil_dataframe(state.soil_data)
        
        # Validate fertility prediction
        if state.fertility_prediction is None:
            raise ValueError("Fertility prediction is required for fertilizer recommendation")
            
        df['Fertility Status'] = state.fertility_prediction
        
        # Apply preprocessing if preprocessor is available
        if fertilizer_preprocessor is not None:
            df_processed = fertilizer_preprocessor.fit_transform(df)
        else:
            # Basic preprocessing fallback
            logger.warning("Fertilizer preprocessor not available, using basic preprocessing")
        
        # Extract features
        feature_columns = ['Simplified Texture', 'pH', 'N', 'P', 'K', 'O', 'Ca', 'Mg', 'Cu', 'Fe', 'Zn', 'Fertility Status']
        features = df_processed[feature_columns].values
        
        # Make prediction
        fertilizer_pred = fertilizer_model.predict(features)
        fertilizer_proba = fertilizer_model.predict_proba(features)
        
        # Map numeric predictions to fertilizer types
        fertilizer_type_map = {
            0: "NPK",
            1: "TSP"
        }
        
        # Convert numeric prediction to fertilizer type
        fertilizer_type = fertilizer_type_map.get(fertilizer_pred[0], "UNKNOWN")
        
        state.fertilizer_prediction = fertilizer_type
        state.fertilizer_confidence = float(np.max(fertilizer_proba))
        
        logger.info(f"Fertilizer prediction: {state.fertilizer_prediction} (confidence: {state.fertilizer_confidence:.2f})")
        return state
        
    except Exception as e:
        logger.error(f"Error in fertilizer prediction: {e}")
        return state

In [7]:
# Create sample test data for fertilizer prediction
test_soil_data = {
    "simplified_texture": "Loamy",  # Using lowercase keys to match mapping
    "ph": 6.8,
    "n": 45.5,  # Nitrogen in mg/kg
    "p": 35.2,  # Phosphorus in mg/kg
    "k": 180.0, # Potassium in mg/kg
    "o": 2.5,   # Organic matter %
    "ca": 1250, # Calcium in mg/kg
    "mg": 220,  # Magnesium in mg/kg
    "cu": 1.8,  # Copper in mg/kg
    "fe": 45.0, # Iron in mg/kg
    "zn": 2.2   # Zinc in mg/kg
}

# Create a test state with both soil data and fertility prediction
test_state = WorkflowState(
    soil_data=test_soil_data,
    fertility_prediction="MODERATELY HEALTHY",  # Add mock fertility prediction
    fertility_confidence=0.89
)

# Run both predictions in sequence
fertility_result = predict_fertility_node(test_state)
fertilizer_result = predict_fertilizer_node(fertility_result)

# Display results
print("\nTest Results:")
print(f"Fertility Prediction: {fertility_result.fertility_prediction}")
print(f"Fertility Confidence: {fertility_result.fertility_confidence:.2f}")

print(f"\nFertilizer Prediction: {fertilizer_result.fertilizer_prediction}")
print(f"Fertilizer Confidence: {fertilizer_result.fertilizer_confidence:.2f}")

# Display the input data
print("\nInput Data:")
print(pd.DataFrame([test_soil_data]).to_string())

INFO:__main__:Fertility prediction: MODERATELY HEALTHY (confidence: 0.98)
INFO:__main__:Fertilizer prediction: TSP (confidence: 0.91)



Test Results:
Fertility Prediction: MODERATELY HEALTHY
Fertility Confidence: 0.98

Fertilizer Prediction: TSP
Fertilizer Confidence: 0.91

Input Data:
  simplified_texture   ph     n     p      k    o    ca   mg   cu    fe   zn
0              Loamy  6.8  45.5  35.2  180.0  2.5  1250  220  1.8  45.0  2.2


In [13]:
def generate_explanation_node(state: WorkflowState) -> WorkflowState:
    """Generate AI explanation and recommendations"""
    try:
        if llm is None:
            # Fallback explanation when LLM is not available
            state.explanation = f"Your soil shows {state.fertility_prediction.lower()} fertility status with {state.fertility_confidence:.1%} confidence. The recommended fertilizer {state.fertilizer_prediction} will help improve nutrient availability for your {state.soil_data['crop_type']} crop based on the current nutrient levels (N: {state.soil_data['n']}, P: {state.soil_data['p']}, K: {state.soil_data['k']}) and pH: {state.soil_data['ph']}."
            state.recommendations = [
                f"Apply {state.fertilizer_prediction} according to package instructions",
                "Monitor soil pH and adjust if needed (optimal range: 6.0-7.0)",
                "Maintain proper soil moisture levels for optimal nutrient uptake",
                f"Consider adding organic matter to improve {state.soil_data['simplified_texture'].lower()} soil structure",
                "Test soil nutrients again after 3-4 months to track improvement"
            ]
            return state
        
        # Create prompt for the LLM
        system_prompt = """You are an agricultural expert AI assistant. Your job is to explain soil analysis results and fertilizer recommendations in simple, farmer-friendly language. Provide practical advice and actionable recommendations."""
        
        human_prompt = f"""
        Based on the following soil analysis and predictions, provide a clear explanation and practical recommendations:
        
        Soil Data:
        - Soil Texture: {state.soil_data['simplified_texture']}
        - pH: {state.soil_data['ph']}
        - Nitrogen (N): {state.soil_data['n']}
        - Phosphorus (P): {state.soil_data['p']}
        - Potassium (K): {state.soil_data['k']}
        - Organic Content (O): {state.soil_data['o']}
        - Calcium (Ca): {state.soil_data['ca']}
        - Magnesium (Mg): {state.soil_data['mg']}
        - Copper (Cu): {state.soil_data['cu']}
        - Iron (Fe): {state.soil_data['fe']}
        - Zinc (Zn): {state.soil_data['zn']}
        
        Predictions:
        - Soil Fertility Status: {state.fertility_prediction} (Confidence: {state.fertility_confidence:.1%})
        - Recommended Fertilizer: {state.fertilizer_prediction} (Confidence: {state.fertilizer_confidence:.1%})
        
        Please provide:
        1. A simple explanation of what these results mean for the farmer
        2. Why this fertilizer was recommended based on the soil's nutrient profile
        3. 3-5 specific actionable recommendations for improving soil health and crop yield
        
        Keep the language simple and practical for farmers.
        """
        
        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=human_prompt)
        ]
        
        response = llm.invoke(messages)
        full_response = response.content
        
        # Parse the response to extract explanation and recommendations
        lines = full_response.split('\n')
        explanation_lines = []
        recommendations = []
        
        in_recommendations = False
        for line in lines:
            line = line.strip()
            if not line:
                continue
                
            if 'recommendation' in line.lower() or line.lower().startswith(('1.', '2.', '3.', '4.', '5.', '-', '•')):
                in_recommendations = True
                if line.lower().startswith(('1.', '2.', '3.', '4.', '5.')):
                    recommendations.append(line)
                elif line.startswith(('-', '•')):
                    recommendations.append(line[1:].strip())
                elif not 'recommendation' in line.lower():
                    recommendations.append(line)
            elif not in_recommendations:
                explanation_lines.append(line)
        
        state.explanation = ' '.join(explanation_lines) if explanation_lines else full_response
        state.recommendations = recommendations if recommendations else [
            "Monitor soil moisture regularly",
            "Test soil pH monthly",
            "Apply organic matter to improve soil structure",
            "Follow recommended fertilizer application rates",
            "Consider crop rotation for soil health"
        ]
        
        logger.info("AI explanation generated successfully")
        return state
        
    except Exception as e:
        logger.error(f"Error generating explanation: {e}")
        # Provide fallback explanation
        state.explanation = f"Your soil shows {state.fertility_prediction.lower()} fertility status. The recommended fertilizer {state.fertilizer_prediction} will help improve nutrient availability for your {state.soil_data['crop_type']} crop."
        state.recommendations = [
            "Apply the recommended fertilizer according to package instructions",
            "Monitor soil pH and adjust if needed",
            "Maintain proper soil moisture levels",
            "Consider adding organic matter to improve soil health"
        ]
        return state

In [15]:
# Initialize the LLM (make sure you have your OpenAI API key set)
load_dotenv()  # Load API key from .env file
llm = ChatOpenAI(
    temperature=0.7,
    model="gpt-4o-mini"
)

# Create comprehensive test data
test_soil_data = {
    "simplified_texture": "Loamy",
    "ph": 6.8,
    "n": 45.5,    # Nitrogen in mg/kg
    "p": 35.2,    # Phosphorus in mg/kg
    "k": 180.0,   # Potassium in mg/kg
    "o": 2.5,     # Organic matter %
    "ca": 1250,   # Calcium in mg/kg
    "mg": 220,    # Magnesium in mg/kg
    "cu": 1.8,    # Copper in mg/kg
    "fe": 45.0,   # Iron in mg/kg
    "zn": 2.2,    # Zinc in mg/kg
    "crop_type": "Maize"  # Added crop type for explanation
}

# Create test state with all required predictions
test_state = WorkflowState(
    soil_data=test_soil_data,
    fertility_prediction="High",
    fertility_confidence=0.85,
    fertilizer_prediction="NPK",
    fertilizer_confidence=0.92
)

# Run the explanation node
result_state = generate_explanation_node(test_state)

# Display results
print("=== Soil Analysis Results ===")
print(f"\nSoil Fertility: {result_state.fertility_prediction} (Confidence: {result_state.fertility_confidence:.1%})")
print(f"Recommended Fertilizer: {result_state.fertilizer_prediction} (Confidence: {result_state.fertilizer_confidence:.1%})")

print("\n=== AI Explanation ===")
print(result_state.explanation)

print("\n=== Recommendations ===")
for i, rec in enumerate(result_state.recommendations, 1):
    print(f"{i}. {rec}")


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:AI explanation generated successfully


=== Soil Analysis Results ===

Soil Fertility: High (Confidence: 85.0%)
Recommended Fertilizer: NPK (Confidence: 92.0%)

=== AI Explanation ===
### Explanation of Soil Analysis Results

=== Recommendations ===
1. 1. **Soil Texture and pH**: Your soil is loamy, which means it has a good balance of sand, silt, and clay. This texture is excellent for growing crops because it holds nutrients and water well. The pH level of 6.8 is slightly acidic to neutral, which is ideal for most crops since it allows them to access nutrients effectively.
2. 2. **Nutrient Levels**:
3. **Nitrogen (N)**: 45.5 (high) - This is essential for plant growth and helps with leaf development.
4. **Phosphorus (P)**: 35.2 (high) - Important for root development and flowering.
5. **Potassium (K)**: 180.0 (high) - Supports overall plant health, helps with drought resistance, and improves fruit quality.
6. **Organic Content (O)**: 2.5% (moderate) - This is good, but increasing it can help improve soil structure and nutr