In [2]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from typing import Dict, Any, List, Optional
import joblib
import numpy as np
import pandas as pd
from langgraph.graph import StateGraph, START, END
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
import os
from datetime import datetime
import logging
from dotenv import load_dotenv

# Import your preprocessing functions
from preprocessing import SoilDataPreprocessor
from schema import WorkflowState

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [7]:
import os
import sys
from pathlib import Path

#sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

# Get the project root directory
project_root = str(Path().absolute().parent)
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from models import ModelLoader

loader = ModelLoader()

In [9]:
# Load fertility and fertilizer models
fertility_model = loader.load_model('Soil_Status_randomForest_Classifier_Model.joblib')
fertilizer_model = loader.load_model('Fertilizers_xgb_Classifier_Model.joblib')

# Define mappings for predictions
# 1. Fertility status mapping
FERTILITY_STATUS_MAP = {0: "MODERATELY HEALTHY", 1: "POOR", 2: "VERY POOR"}
# 2. Fertilizer type mapping
FERTILIZER_TYPE_MAP = {0: "NPK", 1: "TSP"}

COLUMN_MAPPING = {
    'simplified_texture': 'simpliedtexture(1)',
    'ph': 'ph', 'n': 'n', 'p': 'p', 'k': 'k', 'o': 'o',
    'ca': 'ca', 'mg': 'mg', 'cu': 'cu', 'fe': 'fe', 'zn': 'zn'
}

FEATURE_COLUMNS = ['simpliedtexture(1)', 'ph', 'n', 'p', 'k', 'o', 'ca', 'mg', 'cu', 'fe', 'zn']

In [10]:
def prepare_soil_dataframe(soil_data: Dict[str, Any]) -> pd.DataFrame:
    """Convert soil data dictionary to DataFrame with proper column names"""
    logger.debug(f"Incoming soil_data: {soil_data}")
    
    mapped_data = {COLUMN_MAPPING.get(k, k): v for k, v in soil_data.items()}
    logger.debug(f"Mapped data: {mapped_data}")
    
    df = pd.DataFrame([mapped_data])
    logger.debug(f"Created DataFrame with shape: {df.shape}")
    logger.debug(f"DataFrame columns: {df.columns.tolist()}")
    logger.debug(f"DataFrame values:\n{df.to_string()}")
    
    return df

def load_model_and_predict(model, df: pd.DataFrame, status_map: Dict[int, str]) -> tuple:
    """Generic function to load model and make predictions"""
    logger.debug(f"Input DataFrame for prediction:\n{df.to_string()}")
    logger.debug(f"DataFrame dtypes:\n{df.dtypes}")
    
    prediction = model.predict(df)
    probabilities = model.predict_proba(df)
    
    logger.debug(f"Raw prediction: {prediction}")
    logger.debug(f"Prediction probabilities: {probabilities}")
    
    status = status_map.get(prediction[0], "UNKNOWN")
    confidence = float(np.max(probabilities))
    
    logger.debug(f"Mapped status: {status}")
    logger.debug(f"Confidence: {confidence}")
    
    return status, confidence

In [11]:
def predict_fertility_node(state: WorkflowState) -> WorkflowState:
    """Predict soil fertility status"""
    logger.info("Starting fertility prediction...")
    
    try:
        df = prepare_soil_dataframe(state["soil_data"])
        logger.debug(f"Original DataFrame for fertility prediction:\n{df.to_string()}")
        
        # Apply preprocessing
        logger.debug("Applying preprocessing...")
        preprocessor = SoilDataPreprocessor()
        df_processed = preprocessor.fit_transform(df)
        
        logger.debug(f"Processed DataFrame shape: {df_processed.shape}")
        logger.debug(f"Processed DataFrame columns: {df_processed.columns.tolist()}")
        logger.debug(f"Processed DataFrame:\n{df_processed.to_string()}")
        
        # Check if all required columns are present
        missing_columns = [col for col in FEATURE_COLUMNS if col not in df_processed.columns]
        if missing_columns:
            logger.error(f"Missing required columns for fertility prediction: {missing_columns}")
            raise ValueError(f"Missing required columns: {missing_columns}")
            
        # Select feature columns for prediction
        df_for_prediction = df_processed[FEATURE_COLUMNS].copy()
        logger.debug(f"Final prediction DataFrame shape: {df_for_prediction.shape}")
        logger.debug(f"Final prediction DataFrame:\n{df_for_prediction.to_string()}")
        
        # Make prediction
        fertility_status, fertility_confidence = load_model_and_predict(
            fertility_model, df_for_prediction, FERTILITY_STATUS_MAP
        )
        
        state["fertility_prediction"] = fertility_status
        state["fertility_confidence"] = fertility_confidence
        
        logger.info(f"Fertility prediction completed: {fertility_status} (confidence: {fertility_confidence:.2f})")
        return state
    
    except Exception as e:
        logger.error(f"Error in fertility prediction: {e}")
        logger.error(f"Exception details:", exc_info=True)
        state["fertility_prediction"] = "UNKNOWN"
        state["fertility_confidence"] = 0.0
        return state

In [14]:
# Create sample test data
test_soil_data = {
    "simplified_texture": "LOAMY SOIL",
    "ph": 5.3,
    "n": 0.15,
    "p": 29.2,
    "k": 1.07,
    "o": 0.9,
    "ca": 1.2,
    "mg": 1.2,
    "cu": 1.5,
    "fe": 12.2,
    "zn": 5.4
}

# Create a test state
test_state = WorkflowState(soil_data=test_soil_data)

# Run the prediction
result_state = predict_fertility_node(test_state)

# Display results
print(f"\nFertility Prediction: {result_state['fertility_prediction']}")
print(f"Confidence: {result_state['fertility_confidence']:.2f}")

# Optional: Display the input data as a DataFrame for visualization
print("\nInput Data:")
print(pd.DataFrame([test_soil_data]).to_string())

INFO:__main__:Starting fertility prediction...
2025-05-28 13:20:03,924 - SoilDataPreprocessor - INFO - SoilDataPreprocessor initialized
INFO:SoilDataPreprocessor:SoilDataPreprocessor initialized
2025-05-28 13:20:03,931 - SoilDataPreprocessor - INFO - Starting fit_transform process
INFO:SoilDataPreprocessor:Starting fit_transform process
2025-05-28 13:20:03,934 - SoilDataPreprocessor - INFO - Parameters - encoding: label, scaling: standard, SMOTE: False
INFO:SoilDataPreprocessor:Parameters - encoding: label, scaling: standard, SMOTE: False
2025-05-28 13:20:03,937 - SoilDataPreprocessor - INFO - Input DataFrame shape: (1, 11)
INFO:SoilDataPreprocessor:Input DataFrame shape: (1, 11)
2025-05-28 13:20:03,943 - SoilDataPreprocessor - INFO - Target column: None
INFO:SoilDataPreprocessor:Target column: None
2025-05-28 13:20:03,948 - SoilDataPreprocessor - INFO - ----------------------------------------


INFO:SoilDataPreprocessor:----------------------------------------
2025-05-28 13:20:03,954 - SoilDataPreprocessor - INFO - STEP 1: Categorical Encoding
INFO:SoilDataPreprocessor:STEP 1: Categorical Encoding
2025-05-28 13:20:03,961 - SoilDataPreprocessor - INFO - Starting categorical encoding with type: label
INFO:SoilDataPreprocessor:Starting categorical encoding with type: label
2025-05-28 13:20:03,981 - SoilDataPreprocessor - INFO - Found 1 categorical columns: ['simpliedtexture(1)']
INFO:SoilDataPreprocessor:Found 1 categorical columns: ['simpliedtexture(1)']
2025-05-28 13:20:04,023 - SoilDataPreprocessor - INFO - Fitted LabelEncoder for simpliedtexture(1), classes: 1
INFO:SoilDataPreprocessor:Fitted LabelEncoder for simpliedtexture(1), classes: 1
2025-05-28 13:20:04,033 - SoilDataPreprocessor - INFO - Categorical encoding completed. Output shape: (1, 11)
INFO:SoilDataPreprocessor:Categorical encoding completed. Output shape: (1, 11)
2025-05-28 13:20:04,043 - SoilDataPreprocessor - 


Fertility Prediction: MODERATELY HEALTHY
Confidence: 0.98

Input Data:
  simplified_texture   ph     n     p     k    o   ca   mg   cu    fe   zn
0         LOAMY SOIL  5.3  0.15  29.2  1.07  0.9  1.2  1.2  1.5  12.2  5.4


In [6]:
def predict_fertilizer_node(state: WorkflowState) -> WorkflowState:
    """Predict fertilizer recommendation"""
    try:
        # Load the preprocessor and model
        fertilizer_preprocessor = SoilDataPreprocessor()
        fertilizer_model = joblib.load("/Users/oscar/Desktop/data-project/Fertiliser_Modelling/models/Fertilizers_xgb_Classifier_Model.joblib")
        
        # Prepare input data including fertility prediction
        df = prepare_soil_dataframe(state.soil_data)
        
        # Validate fertility prediction
        if state.fertility_prediction is None:
            raise ValueError("Fertility prediction is required for fertilizer recommendation")
            
        df['Fertility Status'] = state.fertility_prediction
        
        # Apply preprocessing if preprocessor is available
        if fertilizer_preprocessor is not None:
            df_processed = fertilizer_preprocessor.fit_transform(df)
        else:
            # Basic preprocessing fallback
            logger.warning("Fertilizer preprocessor not available, using basic preprocessing")
        
        # Extract features
        feature_columns = ['Simplified Texture', 'pH', 'N', 'P', 'K', 'O', 'Ca', 'Mg', 'Cu', 'Fe', 'Zn', 'Fertility Status']
        features = df_processed[feature_columns].values
        
        # Make prediction
        fertilizer_pred = fertilizer_model.predict(features)
        fertilizer_proba = fertilizer_model.predict_proba(features)
        
        # Map numeric predictions to fertilizer types
        fertilizer_type_map = {
            0: "NPK",
            1: "TSP"
        }
        
        # Convert numeric prediction to fertilizer type
        fertilizer_type = fertilizer_type_map.get(fertilizer_pred[0], "UNKNOWN")
        
        state.fertilizer_prediction = fertilizer_type
        state.fertilizer_confidence = float(np.max(fertilizer_proba))
        
        logger.info(f"Fertilizer prediction: {state.fertilizer_prediction} (confidence: {state.fertilizer_confidence:.2f})")
        return state
        
    except Exception as e:
        logger.error(f"Error in fertilizer prediction: {e}")
        return state

In [7]:
# Create sample test data for fertilizer prediction
test_soil_data = {
    "simplified_texture": "Loamy",  # Using lowercase keys to match mapping
    "ph": 6.8,
    "n": 45.5,  # Nitrogen in mg/kg
    "p": 35.2,  # Phosphorus in mg/kg
    "k": 180.0, # Potassium in mg/kg
    "o": 2.5,   # Organic matter %
    "ca": 1250, # Calcium in mg/kg
    "mg": 220,  # Magnesium in mg/kg
    "cu": 1.8,  # Copper in mg/kg
    "fe": 45.0, # Iron in mg/kg
    "zn": 2.2   # Zinc in mg/kg
}

# Create a test state with both soil data and fertility prediction
test_state = WorkflowState(
    soil_data=test_soil_data,
    fertility_prediction="MODERATELY HEALTHY",  # Add mock fertility prediction
    fertility_confidence=0.89
)

# Run both predictions in sequence
fertility_result = predict_fertility_node(test_state)
fertilizer_result = predict_fertilizer_node(fertility_result)

# Display results
print("\nTest Results:")
print(f"Fertility Prediction: {fertility_result.fertility_prediction}")
print(f"Fertility Confidence: {fertility_result.fertility_confidence:.2f}")

print(f"\nFertilizer Prediction: {fertilizer_result.fertilizer_prediction}")
print(f"Fertilizer Confidence: {fertilizer_result.fertilizer_confidence:.2f}")

# Display the input data
print("\nInput Data:")
print(pd.DataFrame([test_soil_data]).to_string())

INFO:__main__:Fertility prediction: MODERATELY HEALTHY (confidence: 0.98)
INFO:__main__:Fertilizer prediction: TSP (confidence: 0.91)



Test Results:
Fertility Prediction: MODERATELY HEALTHY
Fertility Confidence: 0.98

Fertilizer Prediction: TSP
Fertilizer Confidence: 0.91

Input Data:
  simplified_texture   ph     n     p      k    o    ca   mg   cu    fe   zn
0              Loamy  6.8  45.5  35.2  180.0  2.5  1250  220  1.8  45.0  2.2


In [13]:
def generate_explanation_node(state: WorkflowState) -> WorkflowState:
    """Generate AI explanation and recommendations"""
    try:
        if llm is None:
            # Fallback explanation when LLM is not available
            state.explanation = f"Your soil shows {state.fertility_prediction.lower()} fertility status with {state.fertility_confidence:.1%} confidence. The recommended fertilizer {state.fertilizer_prediction} will help improve nutrient availability for your {state.soil_data['crop_type']} crop based on the current nutrient levels (N: {state.soil_data['n']}, P: {state.soil_data['p']}, K: {state.soil_data['k']}) and pH: {state.soil_data['ph']}."
            state.recommendations = [
                f"Apply {state.fertilizer_prediction} according to package instructions",
                "Monitor soil pH and adjust if needed (optimal range: 6.0-7.0)",
                "Maintain proper soil moisture levels for optimal nutrient uptake",
                f"Consider adding organic matter to improve {state.soil_data['simplified_texture'].lower()} soil structure",
                "Test soil nutrients again after 3-4 months to track improvement"
            ]
            return state
        
        # Create prompt for the LLM
        system_prompt = """You are an agricultural expert AI assistant. Your job is to explain soil analysis results and fertilizer recommendations in simple, farmer-friendly language. Provide practical advice and actionable recommendations."""
        
        human_prompt = f"""
        Based on the following soil analysis and predictions, provide a clear explanation and practical recommendations:
        
        Soil Data:
        - Soil Texture: {state.soil_data['simplified_texture']}
        - pH: {state.soil_data['ph']}
        - Nitrogen (N): {state.soil_data['n']}
        - Phosphorus (P): {state.soil_data['p']}
        - Potassium (K): {state.soil_data['k']}
        - Organic Content (O): {state.soil_data['o']}
        - Calcium (Ca): {state.soil_data['ca']}
        - Magnesium (Mg): {state.soil_data['mg']}
        - Copper (Cu): {state.soil_data['cu']}
        - Iron (Fe): {state.soil_data['fe']}
        - Zinc (Zn): {state.soil_data['zn']}
        
        Predictions:
        - Soil Fertility Status: {state.fertility_prediction} (Confidence: {state.fertility_confidence:.1%})
        - Recommended Fertilizer: {state.fertilizer_prediction} (Confidence: {state.fertilizer_confidence:.1%})
        
        Please provide:
        1. A simple explanation of what these results mean for the farmer
        2. Why this fertilizer was recommended based on the soil's nutrient profile
        3. 3-5 specific actionable recommendations for improving soil health and crop yield
        
        Keep the language simple and practical for farmers.
        """
        
        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=human_prompt)
        ]
        
        response = llm.invoke(messages)
        full_response = response.content
        
        # Parse the response to extract explanation and recommendations
        lines = full_response.split('\n')
        explanation_lines = []
        recommendations = []
        
        in_recommendations = False
        for line in lines:
            line = line.strip()
            if not line:
                continue
                
            if 'recommendation' in line.lower() or line.lower().startswith(('1.', '2.', '3.', '4.', '5.', '-', '•')):
                in_recommendations = True
                if line.lower().startswith(('1.', '2.', '3.', '4.', '5.')):
                    recommendations.append(line)
                elif line.startswith(('-', '•')):
                    recommendations.append(line[1:].strip())
                elif not 'recommendation' in line.lower():
                    recommendations.append(line)
            elif not in_recommendations:
                explanation_lines.append(line)
        
        state.explanation = ' '.join(explanation_lines) if explanation_lines else full_response
        state.recommendations = recommendations if recommendations else [
            "Monitor soil moisture regularly",
            "Test soil pH monthly",
            "Apply organic matter to improve soil structure",
            "Follow recommended fertilizer application rates",
            "Consider crop rotation for soil health"
        ]
        
        logger.info("AI explanation generated successfully")
        return state
        
    except Exception as e:
        logger.error(f"Error generating explanation: {e}")
        # Provide fallback explanation
        state.explanation = f"Your soil shows {state.fertility_prediction.lower()} fertility status. The recommended fertilizer {state.fertilizer_prediction} will help improve nutrient availability for your {state.soil_data['crop_type']} crop."
        state.recommendations = [
            "Apply the recommended fertilizer according to package instructions",
            "Monitor soil pH and adjust if needed",
            "Maintain proper soil moisture levels",
            "Consider adding organic matter to improve soil health"
        ]
        return state

In [15]:
# Initialize the LLM (make sure you have your OpenAI API key set)
load_dotenv()  # Load API key from .env file
llm = ChatOpenAI(
    temperature=0.7,
    model="gpt-4o-mini"
)

# Create comprehensive test data
test_soil_data = {
    "simplified_texture": "Loamy",
    "ph": 6.8,
    "n": 45.5,    # Nitrogen in mg/kg
    "p": 35.2,    # Phosphorus in mg/kg
    "k": 180.0,   # Potassium in mg/kg
    "o": 2.5,     # Organic matter %
    "ca": 1250,   # Calcium in mg/kg
    "mg": 220,    # Magnesium in mg/kg
    "cu": 1.8,    # Copper in mg/kg
    "fe": 45.0,   # Iron in mg/kg
    "zn": 2.2,    # Zinc in mg/kg
    "crop_type": "Maize"  # Added crop type for explanation
}

# Create test state with all required predictions
test_state = WorkflowState(
    soil_data=test_soil_data,
    fertility_prediction="High",
    fertility_confidence=0.85,
    fertilizer_prediction="NPK",
    fertilizer_confidence=0.92
)

# Run the explanation node
result_state = generate_explanation_node(test_state)

# Display results
print("=== Soil Analysis Results ===")
print(f"\nSoil Fertility: {result_state.fertility_prediction} (Confidence: {result_state.fertility_confidence:.1%})")
print(f"Recommended Fertilizer: {result_state.fertilizer_prediction} (Confidence: {result_state.fertilizer_confidence:.1%})")

print("\n=== AI Explanation ===")
print(result_state.explanation)

print("\n=== Recommendations ===")
for i, rec in enumerate(result_state.recommendations, 1):
    print(f"{i}. {rec}")


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:AI explanation generated successfully


=== Soil Analysis Results ===

Soil Fertility: High (Confidence: 85.0%)
Recommended Fertilizer: NPK (Confidence: 92.0%)

=== AI Explanation ===
### Explanation of Soil Analysis Results

=== Recommendations ===
1. 1. **Soil Texture and pH**: Your soil is loamy, which means it has a good balance of sand, silt, and clay. This texture is excellent for growing crops because it holds nutrients and water well. The pH level of 6.8 is slightly acidic to neutral, which is ideal for most crops since it allows them to access nutrients effectively.
2. 2. **Nutrient Levels**:
3. **Nitrogen (N)**: 45.5 (high) - This is essential for plant growth and helps with leaf development.
4. **Phosphorus (P)**: 35.2 (high) - Important for root development and flowering.
5. **Potassium (K)**: 180.0 (high) - Supports overall plant health, helps with drought resistance, and improves fruit quality.
6. **Organic Content (O)**: 2.5% (moderate) - This is good, but increasing it can help improve soil structure and nutr