# Crop Prediction Pipeline

This notebook consumes JSON files from the environmental data retriever and uses the ML model to predict the best crop for the given conditions.

## 1. Import Required Libraries

In [2]:
import json
import os
import sys
import glob
import joblib
import pandas as pd
import numpy as np
from datetime import datetime
from typing import Dict, Any, List, Optional

# Add parent directory to path to import prediction module
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'advisor_lib'))

print("✅ All required libraries imported successfully!")

✅ All required libraries imported successfully!


## 2. JSON Data Loader Functions

In [3]:
def load_latest_ml_data(data_dir: str = "data_output") -> Optional[Dict[str, Any]]:
    """
    Load the latest ML-ready JSON data file
    
    Args:
        data_dir (str): Directory containing JSON data files
    
    Returns:
        Optional[Dict[str, Any]]: Loaded ML data or None if not found
    """
    try:
        # Find all ML-ready data files
        pattern = os.path.join(data_dir, "ml_ready_data_*.json")
        files = glob.glob(pattern)
        
        if not files:
            print(f"❌ No ML-ready data files found in {data_dir}")
            return None
        
        # Get the latest file
        latest_file = max(files, key=os.path.getctime)
        
        with open(latest_file, 'r') as f:
            data = json.load(f)
        
        print(f"✅ Loaded ML data from: {latest_file}")
        return data
        
    except Exception as e:
        print(f"❌ Error loading ML data: {e}")
        return None


def load_environmental_data(data_dir: str = "data_output") -> Optional[Dict[str, Any]]:
    """
    Load the latest comprehensive environmental data
    
    Args:
        data_dir (str): Directory containing JSON data files
    
    Returns:
        Optional[Dict[str, Any]]: Loaded environmental data or None if not found
    """
    try:
        pattern = os.path.join(data_dir, "environmental_data_*.json")
        files = glob.glob(pattern)
        
        if not files:
            print(f"❌ No environmental data files found in {data_dir}")
            return None
        
        latest_file = max(files, key=os.path.getctime)
        
        with open(latest_file, 'r') as f:
            data = json.load(f)
        
        print(f"✅ Loaded environmental data from: {latest_file}")
        return data
        
    except Exception as e:
        print(f"❌ Error loading environmental data: {e}")
        return None


def prepare_features_for_model(ml_data: Dict[str, Any]) -> List[float]:
    """
    Prepare features in the correct order for the ML model
    Expected order: [N, P, K, temperature, humidity, ph, rainfall]
    
    Args:
        ml_data (Dict[str, Any]): ML-ready data dictionary
    
    Returns:
        List[float]: Feature vector for ML model
    """
    features = [
        float(ml_data.get("N", 70)),           # Nitrogen
        float(ml_data.get("P", 35)),           # Phosphorus
        float(ml_data.get("K", 35)),           # Potassium
        float(ml_data.get("temperature", 25)), # Temperature
        float(ml_data.get("humidity", 65)),    # Humidity
        float(ml_data.get("ph", 6.5)),         # pH
        float(ml_data.get("rainfall", 25))     # Rainfall
    ]
    return features

print("✅ JSON data loader functions defined successfully!")

✅ JSON data loader functions defined successfully!


## 3. ML Model Loading and Prediction Functions

In [4]:
def load_crop_recommendation_model(model_path: str = "../advisor_lib/crop_recommendation_model.pkl") -> Optional[Any]:
    """
    Load the pre-trained crop recommendation model
    
    Args:
        model_path (str): Path to the model file
    
    Returns:
        Optional[Any]: Loaded model or None if failed
    """
    try:
        model = joblib.load(model_path)
        print(f"✅ Crop recommendation model loaded from {model_path}")
        return model
    except Exception as e:
        print(f"❌ Error loading model from {model_path}: {e}")
        # Try alternative path
        alt_path = "../half-baked-stuff/crop_recommendation_model.pkl"
        try:
            model = joblib.load(alt_path)
            print(f"✅ Crop recommendation model loaded from {alt_path}")
            return model
        except Exception as e2:
            print(f"❌ Error loading model from {alt_path}: {e2}")
            return None


def predict_crop_from_json(ml_data: Dict[str, Any], model: Any) -> Optional[str]:
    """
    Predict the best crop using ML model and JSON data
    
    Args:
        ml_data (Dict[str, Any]): ML-ready data from JSON
        model: Loaded ML model
    
    Returns:
        Optional[str]: Predicted crop name or None if failed
    """
    if model is None:
        print("❌ Model not loaded")
        return None
    
    try:
        # Prepare features
        features = prepare_features_for_model(ml_data)
        
        print("🔍 Input features for model:")
        feature_names = ["N", "P", "K", "Temperature", "Humidity", "pH", "Rainfall"]
        for name, value in zip(feature_names, features):
            print(f"   {name}: {value}")
        
        # Make prediction
        prediction = model.predict([features])
        predicted_crop = prediction[0]
        
        print(f"\n🌾 === CROP PREDICTION RESULT ===")
        print(f"🎯 Recommended Crop: {predicted_crop.upper()}")
        
        return predicted_crop
        
    except Exception as e:
        print(f"❌ Error making prediction: {e}")
        return None


def get_crop_prediction_probabilities(ml_data: Dict[str, Any], model: Any) -> Optional[Dict[str, float]]:
    """
    Get prediction probabilities for all crops (if model supports it)
    
    Args:
        ml_data (Dict[str, Any]): ML-ready data from JSON
        model: Loaded ML model
    
    Returns:
        Optional[Dict[str, float]]: Crop probabilities or None if not supported
    """
    if model is None:
        return None
    
    try:
        features = prepare_features_for_model(ml_data)
        
        # Check if model supports predict_proba
        if hasattr(model, 'predict_proba'):
            probabilities = model.predict_proba([features])[0]
            classes = model.classes_
            
            # Create probability dictionary
            prob_dict = {crop: float(prob) for crop, prob in zip(classes, probabilities)}
            
            # Sort by probability
            sorted_probs = dict(sorted(prob_dict.items(), key=lambda x: x[1], reverse=True))
            
            return sorted_probs
        else:
            print("⚠️ Model does not support probability predictions")
            return None
            
    except Exception as e:
        print(f"❌ Error getting probabilities: {e}")
        return None

print("✅ ML model and prediction functions defined successfully!")

✅ ML model and prediction functions defined successfully!


## 4. Complete Prediction Pipeline

In [5]:
class CropPredictionPipeline:
    """
    Complete pipeline for crop prediction from JSON data
    """
    
    def __init__(self, data_dir: str = "data_output", model_path: str = None):
        """
        Initialize the prediction pipeline
        
        Args:
            data_dir (str): Directory containing JSON data files
            model_path (str): Path to ML model file
        """
        self.data_dir = data_dir
        self.model = load_crop_recommendation_model(model_path) if model_path else load_crop_recommendation_model()
        self.last_prediction = None
        self.last_environmental_data = None
    
    def run_prediction_pipeline(self) -> Dict[str, Any]:
        """
        Run the complete prediction pipeline
        
        Returns:
            Dict[str, Any]: Prediction results with environmental context
        """
        print("🚀 === STARTING CROP PREDICTION PIPELINE ===")
        print()
        
        # Load ML-ready data
        print("📂 Loading ML-ready data...")
        ml_data = load_latest_ml_data(self.data_dir)
        if not ml_data:
            return {"error": "Failed to load ML data"}
        
        # Load environmental context
        print("📂 Loading environmental context...")
        env_data = load_environmental_data(self.data_dir)
        
        # Make prediction
        print("\n🤖 Making crop prediction...")
        predicted_crop = predict_crop_from_json(ml_data, self.model)
        
        if not predicted_crop:
            return {"error": "Failed to make prediction"}
        
        # Get probabilities if available
        print("\n📊 Getting prediction probabilities...")
        probabilities = get_crop_prediction_probabilities(ml_data, self.model)
        
        # Compile results
        results = {
            "prediction": {
                "recommended_crop": predicted_crop,
                "confidence": "High" if probabilities else "Standard",
                "probabilities": probabilities
            },
            "environmental_conditions": {
                "location": ml_data.get("metadata", {}).get("coordinates", {}),
                "timestamp": ml_data.get("metadata", {}).get("timestamp"),
                "soil": {
                    "texture": env_data.get("soil_data", {}).get("texture_type") if env_data else "Unknown",
                    "ph": ml_data.get("ph"),
                    "nitrogen": ml_data.get("N"),
                    "phosphorus": ml_data.get("P"),
                    "potassium": ml_data.get("K")
                },
                "weather": {
                    "temperature": ml_data.get("temperature"),
                    "humidity": ml_data.get("humidity"),
                    "rainfall": ml_data.get("rainfall")
                }
            },
            "model_features": prepare_features_for_model(ml_data)
        }
        
        # Store for future reference
        self.last_prediction = results
        self.last_environmental_data = env_data
        
        return results
    
    def display_prediction_summary(self, results: Dict[str, Any]) -> None:
        """
        Display a formatted summary of prediction results
        
        Args:
            results (Dict[str, Any]): Prediction results
        """
        if "error" in results:
            print(f"❌ Error: {results['error']}")
            return
        
        print("\n" + "="*60)
        print("🌾 CROP PREDICTION SUMMARY")
        print("="*60)
        
        # Main prediction
        crop = results["prediction"]["recommended_crop"]
        print(f"\n🎯 RECOMMENDED CROP: {crop.upper()}")
        print(f"🔍 Confidence Level: {results['prediction']['confidence']}")
        
        # Location and time
        loc = results["environmental_conditions"]["location"]
        if loc:
            print(f"📍 Location: {loc.get('latitude', 'N/A'):.4f}, {loc.get('longitude', 'N/A'):.4f}")
        print(f"⏰ Analysis Time: {results['environmental_conditions']['timestamp']}")
        
        # Environmental conditions
        print("\n🌱 ENVIRONMENTAL CONDITIONS:")
        soil = results["environmental_conditions"]["soil"]
        weather = results["environmental_conditions"]["weather"]
        
        print(f"   Soil Type: {soil['texture']}")
        print(f"   Soil pH: {soil['ph']}")
        print(f"   Temperature: {weather['temperature']}°C")
        print(f"   Humidity: {weather['humidity']}%")
        print(f"   Expected Rainfall: {weather['rainfall']}mm")
        
        print(f"\n🧪 SOIL NUTRIENTS:")
        print(f"   Nitrogen (N): {soil['nitrogen']}")
        print(f"   Phosphorus (P): {soil['phosphorus']}")
        print(f"   Potassium (K): {soil['potassium']}")
        
        # Top predictions if available
        if results["prediction"]["probabilities"]:
            print("\n📊 TOP CROP RECOMMENDATIONS:")
            for i, (crop_name, prob) in enumerate(list(results["prediction"]["probabilities"].items())[:5]):
                print(f"   {i+1}. {crop_name}: {prob:.3f} ({prob*100:.1f}%)")
        
        print("\n" + "="*60)

print("✅ CropPredictionPipeline class created successfully!")

✅ CropPredictionPipeline class created successfully!


## 5. Execute the Complete Pipeline

In [6]:
# Initialize and run the complete crop prediction pipeline
print("🌾 === CROP PREDICTION PIPELINE EXECUTION ===")
print()

# Create pipeline instance
pipeline = CropPredictionPipeline()

# Run the complete pipeline
prediction_results = pipeline.run_prediction_pipeline()

# Display the results
pipeline.display_prediction_summary(prediction_results)

# Save prediction results
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_file = f"data_output/prediction_results_{timestamp}.json"

try:
    with open(results_file, 'w') as f:
        json.dump(prediction_results, f, indent=2, ensure_ascii=False)
    print(f"\n💾 Prediction results saved to: {results_file}")
except Exception as e:
    print(f"\n❌ Error saving results: {e}")

print("\n🎉 Crop prediction pipeline completed successfully!")

🌾 === CROP PREDICTION PIPELINE EXECUTION ===

✅ Crop recommendation model loaded from ../advisor_lib/crop_recommendation_model.pkl
🚀 === STARTING CROP PREDICTION PIPELINE ===

📂 Loading ML-ready data...
✅ Loaded ML data from: data_output/ml_ready_data_20250822_232226.json
📂 Loading environmental context...
✅ Loaded environmental data from: data_output/environmental_data_20250822_232226.json

🤖 Making crop prediction...
🔍 Input features for model:
   N: 80.0
   P: 40.0
   K: 45.0
   Temperature: 25.5
   Humidity: 65.0
   pH: 7.1
   Rainfall: 25.0

🌾 === CROP PREDICTION RESULT ===
🎯 Recommended Crop: MUSKMELON

📊 Getting prediction probabilities...

🌾 CROP PREDICTION SUMMARY

🎯 RECOMMENDED CROP: MUSKMELON
🔍 Confidence Level: High
📍 Location: 20.2575, 76.7858
⏰ Analysis Time: 2025-08-22 23:22:26

🌱 ENVIRONMENTAL CONDITIONS:
   Soil Type: clay
   Soil pH: 7.1
   Temperature: 25.5°C
   Humidity: 65%
   Expected Rainfall: 25.0mm

🧪 SOIL NUTRIENTS:
   Nitrogen (N): 80
   Phosphorus (P): 40
  

