Need To Set up HTTPS Endpoint

In [None]:
!fuser -k 8000/tcp || true
!fuser -k 8001/tcp || true
!pip install httpx
!pip install requests




In [None]:
# Install required deps (run once per Colab runtime)
!pip -q install fastapi uvicorn pyngrok requests nest_asyncio httpx

import os, threading, time, requests, nest_asyncio
from fastapi import FastAPI, Request, Query
from fastapi.responses import PlainTextResponse
import uvicorn
from pyngrok import ngrok


# ---- CONFIG ----
GEMINI_API_KEY     = os.getenv("GEMINI_API_KEY")
OWM_API_KEY        = os.getenv("OWM_API_KEY")
MANDI_API_KEY      = os.getenv("MANDI_API_KEY")
RESOURCE_ID        = os.getenv("RESOURCE_ID")
WHATSAPP_TOKEN     = os.getenv("WHATSAPP_TOKEN")
PHONE_NUMBER_ID    = os.getenv("PHONE_NUMBER_ID")
NGROK_AUTH_TOKEN   = os.getenv("NGROK_AUTH_TOKEN")
VERIFY_TOKEN       = os.getenv("VERIFY_TOKEN")
PORT = int(os.getenv("PORT", "8000"))                                              # use 8000 unless busy



app = FastAPI()

In [None]:
from fastapi import Query
from fastapi.responses import PlainTextResponse
import datetime
import requests

@app.get("/webhook")
def verify(
    hub_mode: str = Query(default="", alias="hub.mode"),
    hub_challenge: str = Query(default="", alias="hub.challenge"),
    hub_verify_token: str = Query(default="", alias="hub.verify_token"),
):
    print("VERIFY HIT:", {"hub_mode": hub_mode, "hub_challenge": hub_challenge, "hub_verify_token": hub_verify_token})
    if hub_mode == "subscribe" and hub_verify_token == VERIFY_TOKEN and hub_challenge:
        return PlainTextResponse(hub_challenge, status_code=200)
    return PlainTextResponse("invalid", status_code=403)

@app.get("/webhook/")
def verify_slash(
    hub_mode: str = Query(default="", alias="hub.mode"),
    hub_challenge: str = Query(default="", alias="hub.challenge"),
    hub_verify_token: str = Query(default="", alias="hub.verify_token"),
):
    print("VERIFY SLASH HIT:", {"hub_mode": hub_mode, "hub_challenge": hub_challenge, "hub_verify_token": hub_verify_token})
    if hub_mode == "subscribe" and hub_verify_token == VERIFY_TOKEN and hub_challenge:
        return PlainTextResponse(hub_challenge, status_code=200)
    return PlainTextResponse("invalid", status_code=403)


def get_lat_lon_from_place(place_query):
    """Robust geocoder, returns lat/lon or (None, None)."""
    url = "http://api.openweathermap.org/geo/1.0/direct"
    params = {"q": place_query, "limit": 1, "appid": OWM_API_KEY}
    try:
        resp = requests.get(url, params=params, timeout=10)
        results = resp.json()
        if results and "lat" in results[0] and "lon" in results[0]:
            return results[0]["lat"], results[0]["lon"]
        else:
            print("Geocoding error: No results or missing lat/lon.")
            return None, None
    except Exception as e:
        print("OWM geocoding error:", e)
        return None, None

def get_16day_weather_aggregated(lat, lon, owm_api_key, days=5):
    """Safe weather aggregator (free API = 5 days max)."""
    url = "https://api.openweathermap.org/data/2.5/forecast"
    params = {"lat": lat, "lon": lon, "appid": owm_api_key, "units": "metric"}
    try:
        resp = requests.get(url, params=params, timeout=10)
        if resp.status_code != 200:
            print(f"Weather API error: HTTP {resp.status_code}")
            return []
        data = resp.json()
    except Exception as e:
        print("Weather API EXCEPTION:", e)
        return []
    daily_data = {}
    for entry in data.get("list", []):
        try:
            date = datetime.datetime.fromtimestamp(entry["dt"], datetime.timezone.utc).date()
            rain_val = entry.get("rain", {}).get("3h", 0)
            temp_val = entry["main"]["temp"]
        except Exception as e:
            print("Parse error:", e)
            continue
        if date not in daily_data:
            daily_data[date] = {"temp_sum": 0, "rain_sum": 0, "count": 0}
        daily_data[date]["temp_sum"] += temp_val
        daily_data[date]["rain_sum"] += rain_val
        daily_data[date]["count"] += 1
    daily_avgs = []
    for i, date in enumerate(sorted(daily_data.keys())):
        if i >= days: break
        day = daily_data[date]
        avg_temp = day["temp_sum"] / day["count"] if day["count"] else 0
        daily_avgs.append({"date": str(date), "avg_temp": avg_temp, "total_rain": day["rain_sum"]})
    return daily_avgs

def get_weather_openweathermap(lat, lon):
    """Fetches current weather, robust to missing keys."""
    url = "https://api.openweathermap.org/data/2.5/weather"
    params = {"lat": lat, "lon": lon, "appid": OWM_API_KEY, "units": "metric"}
    try:
        resp = requests.get(url, params=params, timeout=10)
        data = resp.json()
        if "main" in data and "weather" in data:
            temp = data["main"].get("temp", "N/A")
            cond = data["weather"][0].get("description", "N/A").capitalize()
            humid = data["main"].get("humidity", "N/A")
            wind = data.get("wind", {}).get("speed", "N/A")
            return (f"Current weather:\n"
                    f"Temperature: {temp}¬∞C\n"
                    f"Condition: {cond}\n"
                    f"Humidity: {humid}%\n"
                    f"Wind: {wind} m/s")
        else:
            print("Weather fetch: No main/weather in response.")
            return "Weather data not found for your location."
    except Exception as e:
        print("OWM weather fetch error:", e)
        return "Error retrieving weather data."


import requests

# ===== MANDI PRICE HELPER =====
def get_mandi_price(commodity, market, api_key=MANDI_API_KEY, resource_id=RESOURCE_ID, days=1):
    """
    Fetch mandi prices for a given commodity, then filter by market case‚Äêinsensitively.
    Returns matching records (today‚Äôs by default).
    """
    url = f"https://api.data.gov.in/resource/{resource_id}"
    params = {
        "api-key": api_key,
        "format": "json",
        "limit": 100,  # fetch up to 100 recent entries for this commodity
        "filters[commodity]": commodity
    }
    try:
        resp = requests.get(url, params=params, timeout=10)
        data = resp.json()
        records = data.get("records", [])
        # Filter locally by market name (case‚Äêinsensitive match)
        matched = [
            rec for rec in records
            if rec.get("market","").strip().lower() == market.strip().lower()
        ]
        return matched[:days]  # return up to 'days' records
    except Exception as e:
        print("Error fetching mandi price:", e)
        return []




In [None]:

# ---- GEMINI FUNCTIONS ----
import requests
import datetime

def build_gemini_prompt(place, npk, soil_color, daily_weather):
    total_rain = sum([d['total_rain'] for d in daily_weather]) if daily_weather else 0
    avg_temp = sum([d['avg_temp'] for d in daily_weather]) / len(daily_weather) if daily_weather else 25
    rain_list = [round(d['total_rain'], 1) for d in daily_weather] if daily_weather else []
    temp_list = [round(d['avg_temp'], 1) for d in daily_weather] if daily_weather else []

    month = datetime.datetime.now().month
    season = ("Kharif (Monsoon)" if month in [6,7,8,9] else
              "Rabi (Winter)" if month in [10,11,12,1,2] else
              "Zaid (Summer)")

    prompt = f"""
You are an expert Indian agricultural advisor. Based on these details, provide personalized crop recommendations in this EXACT format:

üåæ CROP RECOMMENDATIONS for {place}

üìä N: {npk[0]}mg/kg, P: {npk[1]}mg/kg, K: {npk[2]}mg/kg
üü´ Soil Color: {soil_color}
üå§Ô∏è Next 5 days: {total_rain:.1f}mm rain, {avg_temp:.1f}¬∞C avg
üìÖ Season: {season}

Recommend top 2-3 suitable crops for this season with:

1. [Crop Name] üå±
   Reason: [Why it suits the NPK, weather, and season]
   Plant: [Best planting window - month range]
   Harvest: [Expected harvest time - month range]

2. [Crop Name] üåæ
   Reason: [Why it suits the conditions]
   Plant: [Planting window]
   Harvest: [Harvest time]

üíß [Water/irrigation advice based on expected rainfall of {total_rain:.1f}mm]

Weather details for your analysis:
Daily rain (mm): {rain_list}
Daily temps (¬∞C): {temp_list}

Focus on crops suitable for {season} season in India. Give practical, actionable advice for smallholder farmers.
"""
    return prompt

def get_gemini_crop_recommendation(prompt, gemini_api_key=GEMINI_API_KEY):
    url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
    headers = {"Content-Type": "application/json"}
    body = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {"maxOutputTokens": 800, "temperature": 0.7},
        "safetySettings": [
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
        ]
    }
    params = {"key": gemini_api_key}
    try:
        resp = requests.post(url, headers=headers, params=params, json=body, timeout=30)
        print("Gemini API response:", resp.status_code)
        if resp.status_code != 200:
            print("Gemini error body:", resp.text)
            return f"Gemini API error: HTTP {resp.status_code}"
        result = resp.json()
        if "candidates" in result and result["candidates"]:
            return result["candidates"][0]["content"]["parts"][0].get("text", "").strip()
        else:
            print("Gemini API error (structure):", result)
            return f"Gemini API error: {result.get('error', {}).get('message', 'No candidates in reply')}"
    except Exception as e:
        print("Exception in Gemini fetch:", e)
        return f"Error calling Gemini: {e}"


In [None]:
# ===== MANDI PRICE HELPER =====
def get_mandi_price(commodity, market="", api_key=MANDI_API_KEY, resource_id=RESOURCE_ID, days=1):
    """Fetch mandi prices for a given commodity, filter by market case-insensitively"""
    url = f"https://api.data.gov.in/resource/{resource_id}"
    params = {
        "api-key": api_key,
        "format": "json",
        "limit": 100,
        "filters[commodity]": commodity
    }
    try:
        resp = requests.get(url, params=params, timeout=10)
        data = resp.json()
        records = data.get("records", [])
        if market:
            matched = [
                rec for rec in records
                if rec.get("market","").strip().lower() == market.strip().lower()
            ]
        else:
            matched = records
        return matched[:days]
    except Exception as e:
        print("Error fetching mandi price:", e)
        return []

# ===== USER SESSION TRACKING =====
user_sessions = {}

def get_user_session(phone_number):
    if phone_number not in user_sessions:
        user_sessions[phone_number] = {"language": "english", "step": "language_select", "awaiting": ""}
    return user_sessions[phone_number]

def translate_reply(text, target_language):
    """Use Gemini to translate reply to target language using simple farmer-friendly words"""
    if target_language == "english":
        return text

    language_map = {
        "hindi": "Hindi",
        "marathi": "Marathi"
    }

    prompt = f"Translate the following text to {language_map[target_language]} using simple words that a small farmer can easily understand. Avoid technical jargon. Keep it conversational and friendly:\n\n{text}"

    url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
    headers = {"Content-Type": "application/json"}
    body = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": 800}}
    params = {"key": GEMINI_API_KEY}

    try:
        resp = requests.post(url, headers=headers, params=params, json=body, timeout=15)
        if resp.status_code == 200:
            result = resp.json()
            if "candidates" in result and result["candidates"]:
                return result["candidates"][0]["content"]["parts"][0]["text"].strip()
    except Exception as e:
        print("Translation error:", e)

    return text

# ===== ENHANCED WHATSAPP HANDLER =====
@app.post("/webhook")
async def receive(request: Request):
    body = await request.json()
    try:
        entry = body.get("entry", [{}])[0].get("changes", [{}])[0].get("value", {})
        msgs = entry.get("messages", [])

        if msgs and isinstance(msgs, list):
            msg = msgs[0]
            if msg.get("type") == "text":
                from_ = msg.get("from")
                text = msg.get("text", {}).get("body", "").strip()
                print(f"Got message: '{text}' from {from_}")

                session = get_user_session(from_)
                reply = ""

                # Check for "go back" command - enhanced to handle both main menu and language selection
                if text == "0":
                    if session["step"] == "language_select":
                        # Already at language selection, show same menu
                        reply = (
                            "üåê Select your language:\n"
                            "1. English\n"
                            "2. ‡§π‡§ø‡§Ç‡§¶‡•Ä\n"
                            "3. ‡§Æ‡§∞‡§æ‡§†‡•Ä\n\n"
                            "Reply with 1, 2, or 3"
                        )
                    elif session["step"] == "main_menu":
                        # From main menu, go back to language selection
                        session["step"] = "language_select"
                        reply = (
                            "üåê Select your language:\n"
                            "1. English\n"
                            "2. ‡§π‡§ø‡§Ç‡§¶‡•Ä\n"
                            "3. ‡§Æ‡§∞‡§æ‡§†‡•Ä\n\n"
                            "Reply with 1, 2, or 3"
                        )
                    else:
                        # From any other step, go to main menu
                        session["step"] = "main_menu"
                        welcome_msg = (
                            "üåæ Welcome to AgriBot! How can I help you today?\n\n"
                            "1Ô∏è‚É£ Crop suggestion\n"
                            "2Ô∏è‚É£ Ways to save on farming costs\n"
                            "3Ô∏è‚É£ Today's mandi price\n"
                            "4Ô∏è‚É£ Pest/disease alert\n\n"
                            "Choose: 1, 2, 3, or 4. Type 0 to go back."
                        )
                        reply = translate_reply(welcome_msg, session["language"])

                # ===== LANGUAGE SELECTION =====
                elif session["step"] == "language_select":
                    if text in ["1", "2", "3"]:
                        language_options = {"1": "english", "2": "hindi", "3": "marathi"}
                        session["language"] = language_options.get(text, "english")
                        session["step"] = "main_menu"

                        welcome_msg = (
                            "üåæ Welcome to AgriBot! How can I help you today?\n\n"
                            "1Ô∏è‚É£ Crop suggestion\n"
                            "2Ô∏è‚É£ Ways to save on farming costs\n"
                            "3Ô∏è‚É£ Today's mandi price\n"
                            "4Ô∏è‚É£ Pest/disease alert\n\n"
                            "Choose: 1, 2, 3, or 4. Type 0 to go back to language menu."
                        )
                        reply = translate_reply(welcome_msg, session["language"])
                    else:
                        reply = (
                            "üåê Select your language:\n"
                            "1. English\n"
                            "2. ‡§π‡§ø‡§Ç‡§¶‡•Ä\n"
                            "3. ‡§Æ‡§∞‡§æ‡§†‡•Ä\n\n"
                            "Reply with 1, 2, or 3"
                        )

                # ===== MAIN MENU =====
                elif session["step"] == "main_menu":
                    if text == "1":
                        session["step"] = "await_crop_input"
                        reply = (
                            "üå± CROP SUGGESTION\n\n"
                            "Send like this:\n"
                            "crop <village> NPK <N> <P> <K> soil <color> farm <acres>\n\n"
                            "Example:\n"
                            "crop Nashik NPK 80 40 250 soil black farm 2\n\n"
                            "Type 0 to go back to main menu"
                        )
                    elif text == "2":
                        session["step"] = "await_cost_input"
                        reply = (
                            "üí∞ SAVE ON FARMING COSTS\n\n"
                            "Send like this:\n"
                            "cost <crop> <village> farm <acres> budget <amount>\n\n"
                            "Example:\n"
                            "cost cotton Pune farm 2 budget 15000\n\n"
                            "Type 0 to go back to main menu"
                        )
                    elif text == "3":
                        session["step"] = "await_market_input"
                        reply = (
                            "üìà TODAY'S MANDI PRICE\n\n"
                            "Send like this:\n"
                            "price <crop> <mandi_name>\n\n"
                            "Example:\n"
                            "price Cabbage Gurgaon\n\n"
                            "Type 0 to go back to main menu"
                        )
                    elif text == "4":
                        session["step"] = "await_pest_input"
                        reply = (
                            "üö® PEST/DISEASE ALERT\n\n"
                            "Send like this:\n"
                            "pest <crop> <village>\n\n"
                            "Example:\n"
                            "pest cotton Nashik\n\n"
                            "Type 0 to go back to main menu"
                        )
                    else:
                        reply = (
                            "Choose a number (1-4):\n"
                            "1Ô∏è‚É£ Crop suggestion\n"
                            "2Ô∏è‚É£ Save on costs\n"
                            "3Ô∏è‚É£ Mandi price\n"
                            "4Ô∏è‚É£ Pest alert\n\n"
                            "Type 0 to go back to language menu"
                        )

                    reply = translate_reply(reply, session["language"])

                # ===== CROP RECOMMENDATION =====
                elif session["step"] == "await_crop_input":
                    if text.lower().startswith("crop"):
                        parts = text.split()
                        try:
                            place = parts[1]
                            npk_idx = [i for i, p in enumerate(parts) if p.upper() == "NPK"][0]
                            npk = [int(parts[npk_idx+1]), int(parts[npk_idx+2]), int(parts[npk_idx+3])]
                            soil_idx = [i for i, p in enumerate(parts) if p.lower() == "soil"][0]
                            soil_color = parts[soil_idx+1]
                            farm_idx = [i for i, p in enumerate(parts) if p.lower() == "farm"][0]
                            farm_size = parts[farm_idx+1]

                            lat, lon = get_lat_lon_from_place(place)
                            if lat and lon:
                                daily_weather = get_16day_weather_aggregated(lat, lon, OWM_API_KEY, days=5)
                                if daily_weather:
                                    prompt = f"""
You are a friendly farm advisor for small Indian farmers. Use simple, easy language.
Location: {place}, Farm size: {farm_size} acres, Soil: {soil_color}, NPK: {npk}
Weather next 5 days: {daily_weather}

Suggest 2-3 best crops for this farmer. Explain:
- Which crops to grow and why
- When to plant (simple months)
- Expected income
- Simple farming tips

At the end, remind the farmer they can send commands in English like:
- "crop <village> NPK <numbers> soil <color> farm <acres>" for crop suggestions
- Type "0" to go back to main menu

Use everyday language, not technical words.
"""
                                    reply = get_gemini_crop_recommendation(prompt)
                                    reply = translate_reply(reply, session["language"])
                                else:
                                    reply = f"Could not get weather for {place}"
                            else:
                                reply = f"Could not find {place}"

                            session["step"] = "main_menu"
                        except Exception as e:
                            reply = "Wrong format. Try:\ncrop <village> NPK <N> <P> <K> soil <color> farm <acres>"
                    else:
                        reply = "Start with 'crop' then your details"

                # ===== COST OPTIMIZER =====
                elif session["step"] == "await_cost_input":
                    if text.lower().startswith("cost"):
                        parts = text.split()
                        try:
                            crop = parts[1]
                            place = parts[2]
                            farm_idx = [i for i, p in enumerate(parts) if p.lower() == "farm"][0]
                            farm_size = parts[farm_idx+1]
                            budget_idx = [i for i, p in enumerate(parts) if p.lower() == "budget"][0]
                            budget = int(parts[budget_idx+1])

                            prompt = f"""
You are a helpful farm cost advisor for small Indian farmers. Use simple language.

Help this farmer save money:
- Crop: {crop}
- Location: {place}
- Farm: {farm_size} acres
- Budget: ‚Çπ{budget}

Give practical tips to:
1. Save on seeds and fertilizers
2. Reduce irrigation costs
3. Government schemes they can use
4. Expected profit

At the end, remind the farmer they can send commands in English like:
- "cost <crop> <village> farm <acres> budget <amount>" for cost advice
- Type "0" to go back to main menu

Use simple words, no technical terms.
"""
                            reply = get_gemini_crop_recommendation(prompt)
                            reply = translate_reply(reply, session["language"])
                            session["step"] = "main_menu"
                        except Exception as e:
                            reply = "Wrong format. Try:\ncost <crop> <village> farm <acres> budget <amount>"
                    else:
                        reply = "Start with 'cost' then your details"

                # ===== MARKET INTELLIGENCE (UPDATED WITH REAL MANDI API) =====
                elif session["step"] == "await_market_input":
                    if text.lower().startswith("price"):
                        parts = text.split()
                        try:
                            # Accept both "price <crop> <mandi>" and "price <crop>"
                            if len(parts) >= 3:
                                commodity = parts[1]
                                market_name = parts[2]
                            elif len(parts) == 2:
                                commodity = parts[1]
                                market_name = ""  # No market specified, use any available
                            else:
                                raise Exception("Not enough parts")

                            # Fetch mandi prices
                            mandi_results = get_mandi_price(commodity, market_name, MANDI_API_KEY, RESOURCE_ID, days=1)

                            if mandi_results:
                                rec = mandi_results[0]
                                price_info = (
                                    f"Today's price for {rec['commodity']} at {rec['market']} mandi ({rec['arrival_date']}):\n"
                                    f"Low: ‚Çπ{rec['min_price']} | High: ‚Çπ{rec['max_price']} | Average: ‚Çπ{rec['modal_price']}"
                                )
                                prompt = f"""
You are a helpful market advisor for small farmers. Use simple language.

Based on today's real mandi prices:
{price_info}

Tell the farmer:
- Is this a good price to sell today?
- Should they wait or sell now?
- Simple tips to get better rates
- Best time to sell this week

At the end, remind the farmer they can send commands in English like:
- "price <crop> <mandi_name>" for market prices
- Type "0" to go back to main menu

Use everyday language, be practical and helpful.
"""
                                reply = get_gemini_crop_recommendation(prompt)
                                reply = translate_reply(reply, session["language"])
                            else:
                                available_commodities = ["Cabbage", "Banana", "Cauliflower", "Bottle gourd", "Bhindi(Ladies Finger)"]
                                reply = f"No price found for {commodity} at {market_name if market_name else 'any market'} today.\n\nTry these available crops: {', '.join(available_commodities)}"

                            session["step"] = "main_menu"
                        except Exception as e:
                            print("Price block error:", e)
                            reply = "Wrong format! Try:\nprice <crop> <mandi_name>\nOr: price <crop>"
                    else:
                        reply = "Start with 'price' then crop and mandi name"

                # ===== PEST/DISEASE WARNING =====
                elif session["step"] == "await_pest_input":
                    if text.lower().startswith("pest"):
                        parts = text.split()
                        try:
                            crop = parts[1]
                            place = parts[2]

                            lat, lon = get_lat_lon_from_place(place)
                            if lat and lon:
                                weather = get_weather_openweathermap(lat, lon)
                                prompt = f"""
You are a helpful farm doctor for small Indian farmers. Use simple language.

Check for pests and diseases:
- Crop: {crop}
- Location: {place}
- Weather: {weather}

Tell the farmer:
- What pests to watch for now
- Simple signs to look for
- Easy treatments (with costs)
- When to spray
- Home remedies if possible

At the end, remind the farmer they can send commands in English like:
- "pest <crop> <village>" for pest alerts
- Type "0" to go back to main menu

Use simple words, be practical and helpful.
"""
                                reply = get_gemini_crop_recommendation(prompt)
                                reply = translate_reply(reply, session["language"])
                            else:
                                reply = f"Could not find {place}"

                            session["step"] = "main_menu"
                        except Exception as e:
                            reply = "Wrong format. Try:\npest <crop> <village>"
                    else:
                        reply = "Start with 'pest' then crop and village"

                # ===== DEFAULT: LANGUAGE SELECTION =====
                else:
                    session["step"] = "language_select"
                    reply = (
                        "üåê Select your language:\n"
                        "1. English\n"
                        "2. ‡§π‡§ø‡§Ç‡§¶‡•Ä\n"
                        "3. ‡§Æ‡§∞‡§æ‡§†‡•Ä"
                    )

                # ===== SEND REPLY =====
                if reply and from_:
                    url = f"https://graph.facebook.com/v20.0/{PHONE_NUMBER_ID}/messages"
                    headers = {"Authorization": f"Bearer {WHATSAPP_TOKEN}"}
                    payload = {"messaging_product": "whatsapp", "to": from_, "text": {"body": reply}}

                    try:
                        response = requests.post(url, headers=headers, json=payload, timeout=10)
                        print("WhatsApp API response:", response.status_code)
                    except Exception as e:
                        print("WhatsApp send error:", e)

    except Exception as e:
        print("Handler error:", e)

    return {"status": "ok"}


In [None]:
# ---- Start server and ngrok tunnel on the chosen PORT ----
nest_asyncio.apply()
ngrok.set_auth_token(NGROK_AUTHTOKEN)
tunnel = ngrok.connect(PORT, "http")
print("Callback URL (paste into WhatsApp Webhook):", tunnel.public_url + "/webhook")

def run():
    uvicorn.run(app, host="0.0.0.0", port=PORT)

threading.Thread(target=run, daemon=True).start()
time.sleep(2)

INFO:     Started server process [24085]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


Callback URL (paste into WhatsApp Webhook): https://pestersome-hyperprophetic-brennan.ngrok-free.dev/webhook


**Test** **Block**

In [None]:
# Recommendation system (moderate configuration)
# - RandomizedSearchCV: n_iter=12, cv=3
# - RandomForest: ~400 trees, capped depth, min_samples_leaf, max_samples subsampling
# - Calibration: prefit on a small calibration split (cv='prefit')

import io
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.calibration import CalibratedClassifierCV
from scipy.stats import randint
import joblib

# =========================
# 1) Load data (upload in Colab)
# =========================
try:
    from google.colab import files
    print("Upload the dataset CSV now...")
    uploaded = files.upload()
    csv_name = list(uploaded.keys())[0]
    df = pd.read_csv(io.BytesIO(uploaded[csv_name]))
except Exception as e:
    raise RuntimeError("Upload a CSV containing the listed columns and a 'label' target.") from e

print("Loaded shape:", df.shape)
print(df.head())

# =========================
# 2) Standardize column names
# =========================
rename_map = {'Ph': 'ph', 'PH': 'ph', 'pH': 'ph', 'WS2M_RA': 'WS2M_RANGE'}
df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})

if 'label' not in df.columns:
    raise ValueError("Target column 'label' not found. Add/rename the crop label column to 'label'.")

# =========================
# 3) Define expected columns
# =========================
nutrient_cols = [c for c in ['N','P','K','Zn','S','ph'] if c in df.columns]
soil_cat_col = 'Soilcolor' if 'Soilcolor' in df.columns else None

qv_cols   = [c for c in ['QV2M-W','QV2M-Sp','QV2M-Su','QV2M-Au'] if c in df.columns]
tmax_cols = [c for c in ['T2M_MAX-W','T2M_MAX-Sp','T2M_MAX-Su','T2M_MAX-Au'] if c in df.columns]
tmin_cols = [c for c in ['T2M_MIN-W','T2M_MIN-Sp','T2M_MIN-Su','T2M_MIN-Au'] if c in df.columns]
pr_cols   = [c for c in ['PRECTOTCORR-W','PRECTOTCORR-Sp','PRECTOTCORR-Su','PRECTOTCORR-Au'] if c in df.columns]

other_meteo = [c for c in ['WD10M','GWETTOP','CLOUD_AMT','WS2M_RANGE','PS'] if c in df.columns]

# =========================
# 4) Feature engineering
# =========================
def add_seasonal_aggregates(frame: pd.DataFrame) -> pd.DataFrame:
    out = frame.copy()
    if qv_cols:
        out['QV2M_mean'] = out[qv_cols].mean(axis=1)
        out['QV2M_amp']  = out[qv_cols].max(axis=1) - out[qv_cols].min(axis=1)
    if tmax_cols:
        out['T2M_MAX_mean'] = out[tmax_cols].mean(axis=1)
        out['T2M_MAX_amp']  = out[tmax_cols].max(axis=1) - out[tmax_cols].min(axis=1)
    if tmin_cols:
        out['T2M_MIN_mean'] = out[tmin_cols].mean(axis=1)
        out['T2M_MIN_amp']  = out[tmin_cols].max(axis=1) - out[tmin_cols].min(axis=1)
    if pr_cols:
        out['PREC_sum']  = out[pr_cols].sum(axis=1)
        out['PREC_mean'] = out[pr_cols].mean(axis=1)
        out['PREC_amp']  = out[pr_cols].max(axis=1) - out[pr_cols].min(axis=1)
    return out

df_fe = add_seasonal_aggregates(df)

KEEP_SEASONALS = False
seasonals = qv_cols + tmax_cols + tmin_cols + pr_cols if KEEP_SEASONALS else []

numeric_features = nutrient_cols + other_meteo + [
    c for c in ['QV2M_mean','QV2M_amp','T2M_MAX_mean','T2M_MAX_amp',
                'T2M_MIN_mean','T2M_MIN_amp','PREC_sum','PREC_mean','PREC_amp']
    if c in df_fe.columns
] + seasonals

categorical_features = [soil_cat_col] if soil_cat_col else []

if len(numeric_features) + len(categorical_features) == 0:
    raise ValueError("No usable features were found after feature engineering.")

# =========================
# 5) Split (with small calibration split)
# =========================
X = df_fe[numeric_features + categorical_features].copy()
y = df_fe['label'].astype(str).copy()

# First split: train/test
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Second split: train/calibration (prefit calibration is cheaper)
X_train, X_cal, y_train, y_cal = train_test_split(
    X_train_full, y_train_full, test_size=0.2, stratify=y_train_full, random_state=42
)

# =========================
# 6) Preprocess + base model pipeline
# =========================
numeric_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median'))])
categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')),
                                          ('ohe', OneHotEncoder(handle_unknown='ignore'))])

preprocess = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='drop'
)

# Moderately sized forest with subsampling to save RAM/CPU
rf = RandomForestClassifier(
    n_estimators=400,
    max_depth=16,
    min_samples_leaf=2,
    max_samples=0.8,
    random_state=42,
    n_jobs=-1,
    class_weight='balanced'
)

base_pipe = Pipeline(steps=[('prep', preprocess), ('model', rf)])

# =========================
# 7) Hyperparameter tuning (compact)
# =========================
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

param_dist = {
    "model__n_estimators": randint(300, 700),
    "model__max_depth": [10, 14, 18, None],
    "model__min_samples_split": randint(2, 12),
    "model__min_samples_leaf": randint(1, 5),
    "model__max_features": ["sqrt", 0.5],
    "model__max_samples": [0.7, 0.8, 0.9],
    "model__bootstrap": [True],
}

rs = RandomizedSearchCV(
    base_pipe,
    param_distributions=param_dist,
    n_iter=12,            # smaller search
    cv=cv,                # 3-fold CV
    n_jobs=-1,
    verbose=1,
    refit=True,
    random_state=42
)

rs.fit(X_train, y_train)
best_pipe = rs.best_estimator_

# =========================
# 8) Probability calibration (prefit: no retraining of base model)
# =========================
calibrated_pipe = CalibratedClassifierCV(estimator=best_pipe, method="sigmoid", cv='prefit')
# Fit calibrator on the held-out calibration split
calibrated_pipe.fit(X_cal, y_cal)

# =========================
# 9) Evaluate
# =========================
pred = calibrated_pipe.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f"\nHold-out accuracy (calibrated): {acc:.4f}\n")
print("Classification report (calibrated):\n", classification_report(y_test, pred))

# =========================
# 10) Recommender with flexible inputs
# =========================
def _build_feature_row(params: dict) -> pd.DataFrame:
    row = {f: np.nan for f in numeric_features}
    for k, v in params.items():
        if k in row:
            row[k] = v
    df1 = pd.DataFrame([row])
    if soil_cat_col:
        df1[soil_cat_col] = params.get(soil_cat_col, 'Unknown')
    df1 = df1.reindex(columns=numeric_features + categorical_features)
    return df1

def recommend_crops(params: dict, top_k: int = 3) -> pd.DataFrame:
    x = _build_feature_row(params)
    proba = calibrated_pipe.predict_proba(x)[0]
    classes = calibrated_pipe.classes_
    order = np.argsort(proba)[::-1][:top_k]
    return pd.DataFrame({'crop': classes[order], 'probability': proba[order]})

# =========================
# 11) Example call (aggregated inputs)
# =========================
example_agg = {
    'N': 90, 'P': 42, 'K': 43, 'Zn': 1.2, 'S': 10, 'ph': 6.5,
    'QV2M_mean': 0.012, 'T2M_MAX_mean': 32.0, 'T2M_MIN_mean': 20.0, 'PREC_sum': 950.0,
    'WD10M': 180.0, 'GWETTOP': 0.35, 'CLOUD_AMT': 0.45, 'WS2M_RANGE': 6.0, 'PS': 98.0,
    'Soilcolor': 'brown'
}
print("\nTop-5 crops (aggregated inputs):")
display(recommend_crops(example_agg, top_k=5))

# =========================
# 12) Save calibrated pipeline
# =========================
joblib.dump({
    'pipeline': calibrated_pipe,
    'numeric_features': numeric_features,
    'categorical_features': categorical_features,
    'seasonal_groups': {'QV2M': qv_cols, 'TMAX': tmax_cols, 'TMIN': tmin_cols, 'PREC': pr_cols}
}, "crop_recommender_v3_light.joblib")
print("\nSaved model to crop_recommender_v3_light.joblib")


testing the model

In [None]:
# Interactive prediction with only: pincode, N, P, K
# Works for artifacts saved as {'pipeline', 'numeric_features', 'categorical_features', ...}

import numpy as np
import pandas as pd
import joblib

# UI
import ipywidgets as widgets
from IPython.display import display, clear_output

MODEL_PATH = "crop_recommender_v3_light.joblib"

def _load_pipeline_and_features(path):
    """Return (clf_pipeline, numeric_features, categorical_features)."""
    obj = joblib.load(path)  # joblib returns exactly what was saved [dict with 'pipeline'] [web:97]
    # Case 1: saved as dict with a calibrated Pipeline
    if isinstance(obj, dict) and 'pipeline' in obj:
        clf = obj['pipeline']
        num_feats = obj.get('numeric_features', [])
        cat_feats = obj.get('categorical_features', [])
        if not hasattr(clf, "predict_proba"):
            raise ValueError("Loaded pipeline has no predict_proba; ensure final step is probabilistic.")
        return clf, num_feats, cat_feats
    # Case 2: a direct Pipeline/estimator
    if hasattr(obj, "predict_proba"):
        # No feature lists; attempt to use attrs if needed later
        return obj, [], []
    # Unsupported
    raise ValueError("Saved file must be a Pipeline or a dict containing 'pipeline' with a fitted Pipeline.")

def _build_full_row(N, P, K, num_feats, cat_feats):
    """Create a single-row DataFrame containing all expected columns."""
    # Initialize numeric columns with NaN so the pipeline's imputers can fill them [web:102]
    row = {f: np.nan for f in num_feats} if num_feats else {}
    # Fill NPK if those columns exist
    for k, v in {"N": N, "P": P, "K": K}.items():
        if (num_feats and k in row) or (not num_feats):
            row[k] = float(v)
    X = pd.DataFrame([row])

    # Add categorical columns with a safe placeholder (e.g., unseen Soilcolor) [web:102]
    for c in cat_feats:
        X[c] = "Unknown"
    # Order columns to match training if lists are known
    if num_feats or cat_feats:
        X = X.reindex(columns=list(num_feats) + list(cat_feats))
    return X

def _predict_sorted(clf, X, top_k=5):
    """Use calibrated Pipeline to produce top-k predictions with probabilities."""
    proba = clf.predict_proba(X)[0]  # CalibratedClassifierCV/Pipeline exposes predict_proba [web:118][web:59]
    order = np.argsort(proba)[::-1][:top_k]
    classes = getattr(clf, "classes_", None)
    labels = classes[order] if classes is not None else np.arange(len(proba))[order]
    return pd.DataFrame({"crop": labels, "probability": proba[order]})

# Widgets
pincode_w = widgets.Text(description="Pincode:", placeholder="e.g., 560001")
N_w = widgets.FloatText(description="N:", value=90.0)
P_w = widgets.FloatText(description="P:", value=42.0)
K_w = widgets.FloatText(description="K:", value=43.0)
topk_w = widgets.IntSlider(description="Top‚ÄëK", min=1, max=10, value=5)
predict_btn = widgets.Button(description="Predict", button_style="success", icon="play")
out = widgets.Output()

def on_predict_clicked(b):
    with out:
        clear_output()
        try:
            clf, num_feats, cat_feats = _load_pipeline_and_features(MODEL_PATH)  # extract calibrated Pipeline and feature lists [web:97][web:118]
            X = _build_full_row(N=float(N_w.value), P=float(P_w.value), K=float(K_w.value),
                                num_feats=num_feats, cat_feats=cat_feats)  # create full row so ColumnTransformer can select by name [web:102]
            df_out = _predict_sorted(clf, X, top_k=int(topk_w.value))  # predict_proba through the Pipeline [web:59]
            print(f"Pincode: {pincode_w.value.strip()}")
            display(df_out)
        except Exception as e:
            print("Error during prediction:", e)

predict_btn.on_click(on_predict_clicked)

form = widgets.VBox([
    widgets.HBox([pincode_w]),
    widgets.HBox([N_w, P_w, K_w]),
    widgets.HBox([topk_w, predict_btn]),
    out
])
display(form)
