In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load dataset from XLS file
df = pd.read_csv('Crop_recommendation.csv.xls')

# Features and target
X = df.drop("label", axis=1)

y = df["label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train RandomForest model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Save model for reuse
joblib.dump(model, "crop_recommendation_model.pkl")

# Example prediction (replace with soil inputs)
sample = [[9, 42, 432, 20.8, 82.0, 60.5,10]]  # example row
y_pred = model.predict(sample)
print("Recommended Crop:", y_pred[0])


Accuracy: 0.9954545454545455
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.95      1.00      0.98        20
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        20
       maize       0.95      1.00      0.98        20
       mango       1.00      1.00      1.00        20
   mothbeans       1.00      1.00      1.00        20
    mungbean       1.00      1.00      1.00        20
   muskmelon       1.00      1.00      1.00        20
      orange       1.00      1.00      1.00        2



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib

# 1. Load dataset (change to read_excel if needed)
df = pd.read_csv("Crop_recommendation.csv.xls")

# 2. Features and labels
X = df.drop("label", axis=1)
y = df["label"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 4. Train model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# 5. Save model to PKL
joblib.dump(model, "crop_recommendation_model.pkl")

print("✅ Model trained and saved as crop_recommendation_model.pkl")


✅ Model trained and saved as crop_recommendation_model.pkl


In [None]:
import os
import math
import time
import json
import requests
from datetime import datetime, timezone
from typing import Dict, Any, List, Optional

# -----------------------------
# Config
# -----------------------------



OPENWEATHER_API_KEY = os.getenv("OPENWEATHER_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# One Call 3.0 (daily forecast up to 8 days incl. today)
OWM_ONECALL_URL = "https://api.openweathermap.org/data/3.0/onecall"

# OpenEPI Soil API (SoilGrids-backed). The exact path can vary by release.
# The redoc you shared is at https://api.openepi.io/soil/redoc
# Common patterns are either a GET with lat/lon or a /summary aggregator.
# We implement two attempts; the first should work on most deployments.
SOIL_ENDPOINT_CANDIDATES = [
    # 1) Likely direct soil endpoint with query parameters
    "https://api.openepi.io/soil",
    # 2) Aggregated endpoint (agriculture/summary) that can include soil
    "https://api.openepi.io/summary",
]

DEFAULT_UNITS = "metric"  # "metric" => °C, m/s, mm (OWM)
TIMEOUT_SEC = 20


# -----------------------------
# Helpers
# -----------------------------
def _mm_from_pop_and_rain(pop: float, rain: Optional[float]) -> float:
    """
    OpenWeather daily 'rain' is already mm. If it's missing, you might estimate by PoP,
    but we keep it conservative: 0 if missing.
    """
    try:
        if rain is None:
            return 0.0
        return float(rain)
    except Exception:
        return 0.0


def _safedot(d: dict, path: List[str], default=None):
    cur = d
    try:
        for p in path:
            if cur is None:
                return default
            cur = cur.get(p)
        return cur if cur is not None else default
    except Exception:
        return default


# -----------------------------
# Soil
# -----------------------------
def get_soil(lat: float, lon: float) -> Dict[str, Any]:
    """
    Attempts to fetch soil info from OpenEPI Soil API.
    We try a couple of plausible endpoints. You can lock this to the exact route your
    deployment uses once confirmed in the Redoc.
    Returns a normalized dict of key agronomic fields where available.
    """
    headers = {"Accept": "application/json"}
    params_variants = [
        {"lat": lat, "lon": lon},                 # simple form
        {"latitude": lat, "longitude": lon},      # alt naming
    ]

    last_err = None
    for base_url in SOIL_ENDPOINT_CANDIDATES:
        for q in params_variants:
            try:
                r = requests.get(base_url, params=q, headers=headers, timeout=TIMEOUT_SEC)
                if r.status_code == 200:
                    raw = r.json()
                    return normalize_soil(raw)
            except Exception as e:
                last_err = e
                continue

    # If all attempts fail, return minimal structure with reason
    return {"_error": f"Soil API fetch failed: {last_err}"}


def normalize_soil(raw: Dict[str, Any]) -> Dict[str, Any]:
    """
    Normalize common SoilGrids-backed fields if present.
    Different deployments may wrap fields with 'properties' or 'data'.
    We attempt to detect pH, organic carbon, texture/sand/silt/clay, bulk density, CEC.
    Units can vary; we pass through values and include raw for transparency.
    """
    # Heuristics for locating data
    candidates = [
        raw,
        raw.get("data", {}),
        raw.get("properties", {}),
        _safedot(raw, ["soil"], {}),
        _safedot(raw, ["result"], {}),
    ]

    val = {}
    for c in candidates:
        if not isinstance(c, dict):
            continue

        # Common keys by SoilGrids naming
        for k in c.keys():
            lk = k.lower()
            # pH
            if "ph" in lk and val.get("ph") is None:
                try:
                    phv = c[k]
                    if isinstance(phv, dict) and "value" in phv:
                        phv = phv["value"]
                    val["ph"] = float(phv)
                except Exception:
                    pass

            # Organic carbon (g/kg or %)
            if ("orgc" in lk or "organic" in lk) and val.get("organic_carbon") is None:
                v = c[k]
                if isinstance(v, dict) and "value" in v:
                    v = v["value"]
                try:
                    val["organic_carbon"] = float(v)
                except Exception:
                    pass

            # Texture fractions
            if "sand" in lk and val.get("sand_pct") is None:
                v = c[k]
                if isinstance(v, dict) and "value" in v:
                    v = v["value"]
                try:
                    val["sand_pct"] = float(v)
                except Exception:
                    pass
            if "silt" in lk and val.get("silt_pct") is None:
                v = c[k]
                if isinstance(v, dict) and "value" in v:
                    v = v["value"]
                try:
                    val["silt_pct"] = float(v)
                except Exception:
                    pass
            if "clay" in lk and val.get("clay_pct") is None:
                v = c[k]
                if isinstance(v, dict) and "value" in v:
                    v = v["value"]
                try:
                    val["clay_pct"] = float(v)
                except Exception:
                    pass

            # Bulk density
            if ("bulk" in lk and "density" in lk) and val.get("bulk_density") is None:
                v = c[k]
                if isinstance(v, dict) and "value" in v:
                    v = v["value"]
                try:
                    val["bulk_density"] = float(v)
                except Exception:
                    pass

            # Cation Exchange Capacity
            if ("cec" in lk or "cation" in lk) and val.get("cec") is None:
                v = c[k]
                if isinstance(v, dict) and "value" in v:
                    v = v["value"]
                try:
                    val["cec"] = float(v)
                except Exception:
                    pass

            # Texture class if explicitly provided
            if "texture" in lk and isinstance(c[k], (str,)):
                val["texture_class"] = c[k]

    # If texture class not present, infer a simple class
    if "texture_class" not in val and all(v is not None for v in (val.get("sand_pct"), val.get("silt_pct"), val.get("clay_pct"))):
        sand, silt, clay = val["sand_pct"], val["silt_pct"], val["clay_pct"]
        # very rough heuristic:
        if clay >= 40:
            val["texture_class"] = "Clayey"
        elif sand >= 65:
            val["texture_class"] = "Sandy"
        elif 20 <= clay <= 35 and 30 <= silt <= 50:
            val["texture_class"] = "Loam/Clay-loam"
        else:
            val["texture_class"] = "Loam-like"

    val["_raw_soil_sample"] = raw
    return val


# -----------------------------
# Weather
# -----------------------------
def get_weather(lat: float, lon: float, units: str = DEFAULT_UNITS) -> Dict[str, Any]:
    """
    Fetch current + daily forecast (up to 8 days) from OpenWeather One Call 3.0.
    """
    if not OPENWEATHER_API_KEY:
        raise RuntimeError("OPENWEATHER_API_KEY not set")

    params = {
        "lat": lat,
        "lon": lon,
        "appid": OPENWEATHER_API_KEY,
        "units": units,
        "exclude": "minutely",  # we want current+hourly+daily; we drop minutely; alerts may appear if any
    }
    r = requests.get(OWM_ONECALL_URL, params=params, timeout=TIMEOUT_SEC)
    r.raise_for_status()
    data = r.json()
    return normalize_weather(data)


def normalize_weather(raw: Dict[str, Any]) -> Dict[str, Any]:
    """
    Extract key fields from current + daily.
    """
    current = raw.get("current", {})
    daily = raw.get("daily", [])  # list of up to 8

    out = {
        "current": {
            "time_utc": datetime.fromtimestamp(current.get("dt", 0), tz=timezone.utc).isoformat() if current.get("dt") else None,
            "temp_c": _safedot(current, ["temp"]),
            "humidity_pct": _safedot(current, ["humidity"]),
            "wind_mps": _safedot(current, ["wind_speed"]),
            "uvi": _safedot(current, ["uvi"]),
            "weather_main": _safedot(_safedot(current, ["weather"], [{}])[0], ["main"]),
            "weather_desc": _safedot(_safedot(current, ["weather"], [{}])[0], ["description"]),
        },
        "daily": []
    }

    for d in daily:
        day = {
            "date_utc": datetime.fromtimestamp(d.get("dt", 0), tz=timezone.utc).date().isoformat() if d.get("dt") else None,
            "temp_min_c": _safedot(d, ["temp", "min"]),
            "temp_max_c": _safedot(d, ["temp", "max"]),
            "humidity_pct": _safedot(d, ["humidity"]),
            "wind_mps": _safedot(d, ["wind_speed"]),
            "rain_mm": _mm_from_pop_and_rain(_safedot(d, ["pop"], 0.0), d.get("rain")),
            "uvi": _safedot(d, ["uvi"]),
            "weather_main": _safedot(_safedot(d, ["weather"], [{}])[0], ["main"]),
            "weather_desc": _safedot(_safedot(d, ["weather"], [{}])[0], ["description"]),
        }
        out["daily"].append(day)
    out["_raw_weather_sample"] = raw
    return out


# -----------------------------
# LLM
# -----------------------------
def llm_weekly_plan(
    crop: str,
    soil: Dict[str, Any],
    weather: Dict[str, Any],
    location_name: Optional[str] = None,
    weeks: int = 4,
) -> str:
    """
    Call an LLM to produce a weekly farming plan based on crop, soil, and the next 1-2 weeks of weather.
    Uses OpenAI's Responses API (recommended path as of 2025).
    """
    if not OPENAI_API_KEY:
        raise RuntimeError("OPENAI_API_KEY not set")

    # Summarize inputs for the prompt
    soil_bits = {
        "pH": soil.get("ph"),
        "texture_class": soil.get("texture_class"),
        "organic_carbon": soil.get("organic_carbon"),
        "sand_pct": soil.get("sand_pct"),
        "silt_pct": soil.get("silt_pct"),
        "clay_pct": soil.get("clay_pct"),
        "cec": soil.get("cec"),
        "bulk_density": soil.get("bulk_density"),
    }

    cur = weather.get("current", {})
    dailies = weather.get("daily", [])[:max(7, weeks * 7)]  # give the model up to ~weeks*7 days

    # Build a compact weather bullet list to keep prompt small
    wx_lines = []
    for d in dailies:
        wx_lines.append(
            f"{d.get('date_utc')}: {d.get('weather_main')} {d.get('weather_desc')}, "
            f"{d.get('temp_min_c')}–{d.get('temp_max_c')}°C, RH {d.get('humidity_pct')}%, "
            f"rain {d.get('rain_mm')}mm, wind {d.get('wind_mps')} m/s, UVI {d.get('uvi')}"
        )
    wx_block = "\n".join(wx_lines[:14])  # cap to ~2 weeks of lines

    place = f" at {location_name}" if location_name else ""
    system_prompt = (
        "You are an agronomy advisor that writes precise, practical, week-by-week field plans. "
        "Be conservative with fertilizer rates unless guidelines are explicit; recommend ranges. "
        "If rainfall is high, adjust irrigation down. If UVI and wind are high, caution on spraying. "
        "Use metric units. Output must be structured in JSON with keys: overview, weeks[], cautions."
    )

    user_prompt = f"""
Crop: {crop}
Location{place}
Soil summary: {json.dumps(soil_bits, ensure_ascii=False)}
Current weather: {json.dumps(cur, ensure_ascii=False)}
Daily forecast (up to 2 weeks):
{wx_block}

Task: Produce a practical plan for the next {weeks} weeks with sections:
- overview: brief assumptions and goals
- weeks: an array where each element has weekNumber, irrigation, fertilizer, pest_disease, field_operations, notes
- cautions: bullet list of weather/soil related cautions (e.g., high rainfall, heat stress)

Important rules:
- Reference forecast windows when scheduling irrigation (e.g., delay watering if >15mm rain forecast in 48h).
- Keep fertilizer as ranges and split doses where relevant (e.g., basal/topdress).
- Include generic IPM notes tied to humidity/temperature ranges.
- Do NOT invent unsafe chemical usage; prefer integrated management.
- Keep each weekly section under ~120 words.

Return ONLY valid JSON.
"""

    # OpenAI Responses API call
    # Official docs: https://platform.openai.com/docs/api-reference/responses
    import openai  # type: ignore

    openai.api_key = OPENAI_API_KEY

    # Prefer gpt-4o (or gpt-4o-mini for cost); adjust per your account’s available models.
    model = os.getenv("OPENAI_MODEL", "gpt-4o-mini")

    resp = openai.responses.create(
        model=model,
        input=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": user_prompt
            }
        ],
        temperature=0.4,
    )

    # The SDK returns convenient output_text in recent versions; fall back to parsing
    try:
        text = resp.output_text
    except Exception:
        # Older SDK: collect content parts
        text = ""
        if hasattr(resp, "output") and isinstance(resp.output, list):
            for part in resp.output:
                if part.get("type") == "output_text":
                    text += part.get("text", "")

    return text.strip()


# -----------------------------
# Public entry
# -----------------------------
def generate_weekly_plan(
    lat: float,
    lon: float,
    crop: str,
    location_name: Optional[str] = None,
    weeks: int = 4,
    units: str = DEFAULT_UNITS,
) -> Dict[str, Any]:
    """
    Main function: fetch soil + weather, call LLM, return parsed JSON plan.
    """
    soil = get_soil(lat, lon)
    weather = get_weather(lat, lon, units=units)
    plan_json_str = llm_weekly_plan(crop=crop, soil=soil, weather=weather, location_name=location_name, weeks=weeks)

    # Try to parse JSON; if the model returns non-JSON, wrap it
    try:
        plan = json.loads(plan_json_str)
    except Exception:
        plan = {"_llm_text": plan_json_str}

    return {
        "inputs": {
            "lat": lat,
            "lon": lon,
            "crop": crop,
            "location_name": location_name,
            "units": units,
        },
        "soil": soil,
        "weather": weather,
        "plan": plan,
        "generated_at": datetime.utcnow().isoformat() + "Z",
    }


if __name__ == "__main__":
    # Quick manual test:
    # Example: Pune, India (approx)
    lat, lon = 18.5204, 73.8567
    crop = "Rice"
    result = generate_weekly_plan(lat=lat, lon=lon, crop=crop, location_name="Pune, IN", weeks=4)
    print(json.dumps(result["plan"], indent=2, ensure_ascii=False))
