In [198]:
import pandas as pd
import numpy as np
from datetime import datetime
from tensorflow.keras.models import load_model
import joblib
import sys
import requests
import numpy as np
import tensorflow as tf
from functools import lru_cache
np.random.seed(42)
tf.random.set_seed(42)

In [199]:
# DATASET = pd.read_csv("data/dataset2.csv")
# DATASET['date'] = pd.to_datetime(DATASET['date'])
# DATASET.ffill(inplace=True)

In [200]:
try:
    model = load_model('./model/model.keras', compile=False)
    model.compile(optimizer='adam', loss='mse')
    scaler_X = joblib.load('./model/scaler_X.pkl')
    scaler_y = joblib.load('./model/scaler_y.pkl')
    encoder = joblib.load('./model/encoder.pkl')
except FileNotFoundError as e:
    sys.exit(f"[FATAL] Missing file: {e.filename}")

In [201]:
def validate_date(target_date_str):
    try:
        target_date = pd.Timestamp(target_date_str)
    except:
        raise ValueError("Invalid date format. Use YYYY-MM-DD")
    if target_date.date() < datetime.today().date():
        raise ValueError("Past dates are not allowed. Select a future date.")
    return target_date

def safe_numeric(value, default=0.0):
    if pd.isna(value) or np.isinf(value):
        return default
    return value

In [202]:
def get_weather_forecast(lat, lon, target_date):
    """
    Fetch weather forecast for given lat/lon and date from Open-Meteo.
    Returns a dict with rainfall, temperatures, weather_score.
    """
    url = (
        f"https://api.open-meteo.com/v1/forecast"
        f"?latitude={lat}&longitude={lon}"
        f"&daily=precipitation_sum,temperature_2m_max,temperature_2m_min"
        f"&start_date={target_date}&end_date={target_date}"
        f"&timezone=UTC"
    )
    try:
        resp = requests.get(url, timeout=10)
        resp.raise_for_status()
        data = resp.json()
        daily = data.get('daily', {})
        rainfall = float(daily.get('precipitation_sum', [0])[0])
        temp_max = float(daily.get('temperature_2m_max', [0])[0])
        temp_min = float(daily.get('temperature_2m_min', [0])[0])
        # Simple weather score: high rainfall increases severity
        if rainfall > 20:
            weather_score = 1.5
        elif rainfall > 5:
            weather_score = 1.2
        else:
            weather_score = 1.0
        return {
            "rainfall_mm": rainfall,
            "temp_max": temp_max,
            "temp_min": temp_min,
            "weather_score": weather_score
        }
    except Exception as e:
        return {"rainfall_mm": 0.0, "temp_max": None, "temp_min": None, "weather_score": 1.0}


In [203]:
def calculate_dynamic_workforce(predicted_cases, problem_type, region=None, severity_score=None, df=None,
                                urgency_factor=1.0, max_workers=200, weather=None, return_explanation=False):
    """
    Calculate workforce dynamically with realistic scaling + weather adjustment.
    """
    explanation = []

    # Default dynamic values if df is missing
    if df is None or df.empty:
        avg_cases_per_worker = np.random.uniform(0.8, 1.5)
        base_staff = np.random.randint(1, 5)
        efficiency_factor = np.random.uniform(0.7, 0.95)
        severity_score = severity_score or np.random.randint(1, 5)
    else:
        subset = df[df['problem_type'].str.strip().str.title() == problem_type.title()]
        if subset.empty:
            avg_cases_per_worker = np.random.uniform(0.8, 1.5)
            base_staff = np.random.randint(1, 5)
            efficiency_factor = np.random.uniform(0.7, 0.95)
            severity_score = severity_score or np.random.randint(1, 5)
        else:
            avg_cases_per_worker = max(0.5, subset['reported_cases'].mean() / max(1, subset['workforce_required'].mean()))
            base_staff = max(1, int(subset['workforce_required'].min() * 0.5))
            efficiency_factor = min(1.0, 0.75 + (subset['severity_score'].mean() - 2) * 0.05)
            severity_score = severity_score or int(subset['severity_score'].iloc[-1])

    # Weather adjustment
    weather_factor = 1.0
    if weather:
        weather_factor = weather.get("weather_score", 1.0)
        # Increase severity if heavy rain, slightly reduce efficiency
        severity_score = int(round(severity_score * weather_factor))
        efficiency_factor *= 0.95 if weather.get("rainfall_mm", 0) > 5 else 1.0

    # Non-linear severity multiplier
    severity_multiplier = 1 + (severity_score ** 1.5) * 0.25

    # Effective cases adjusted by efficiency
    effective_cases = predicted_cases / efficiency_factor

    # Dynamic cases per worker
    problem_complexity = {
        "Garbage & Waste": 1.0,
        "Water Supply": 1.2,
        "Road Maintenance": 0.9,
        "Electricity": 1.3,
        "Public Safety": 1.5,
        "Drainage": 1.1,
        "Mosquito Control": 1.4,
        "Pothole": 1.3,
    }
    complexity_factor = problem_complexity.get(problem_type.title(), 1.0)
    workforce = (effective_cases / (avg_cases_per_worker * complexity_factor)) * severity_multiplier

    workforce += base_staff
    workforce *= urgency_factor

    capped = False
    if workforce > max_workers:
        workforce = max_workers
        capped = True

    if return_explanation:
        explanation.append(f"Predicted cases: {predicted_cases}")
        explanation.append(f"Severity score (weather-adjusted): {severity_score}, multiplier: {severity_multiplier:.2f}")
        explanation.append(f"Efficiency factor: {efficiency_factor:.2f}, effective cases: {effective_cases:.1f}")
        if weather:
            explanation.append(f"Weather: Rainfall {weather['rainfall_mm']}mm, TempMax {weather['temp_max']}°C")
        explanation.append(f"Avg cases per worker: {avg_cases_per_worker:.2f}, problem complexity factor: {complexity_factor}")
        explanation.append(f"Base staff: {base_staff}")
        if urgency_factor != 1.0:
            explanation.append(f"Urgency factor applied: {urgency_factor}")
        if capped:
            explanation.append(f"Workforce capped to max allowed: {max_workers}")
        explanation.append(f"Final workforce allocated: {int(round(workforce))}")
        return max(1, int(round(workforce))), " ".join(explanation)

    return max(1, int(round(workforce)))


In [204]:
def get_dynamic_case_range(df, problem_type):
    subset = df[df['problem_type'].str.strip().str.title() == problem_type.title()]
    if subset.empty:
        return 1, 20
    min_cases = int(subset['reported_cases'].min())
    max_cases = int(subset['reported_cases'].max())
    if min_cases == max_cases:
        max_cases = min_cases + 5
    return min_cases, max_cases

In [205]:
def predict_random(problem_type, df=None, weather=None):
    min_cases, max_cases = 1, 20
    if df is not None and not df.empty:
        min_cases, max_cases = get_dynamic_case_range(df, problem_type)
    predicted_cases = np.random.randint(min_cases, max_cases + 1)
    predicted_workforce, explanation = calculate_dynamic_workforce(predicted_cases, problem_type, df=df, weather=weather, return_explanation=True)
    return predicted_cases, predicted_workforce, explanation

In [206]:
def predict(problem_type_input, target_date_str, lat=28.6139, lon=77.2090):
    target_date = validate_date(target_date_str)
    weather = get_weather_forecast(lat, lon, target_date.date())
    try:
        df = pd.read_csv("data/dataset2.csv")
        df['date'] = pd.to_datetime(df['date'])
        df.ffill(inplace=True)

        subset = df[df['problem_type'].str.strip().str.title() == problem_type_input.title()].sort_values('date')
        if subset.empty:
            raise ValueError(f"No historical data for {problem_type_input}")

        seq_len = 30
        seq = subset.tail(seq_len).copy()
        if len(seq) < seq_len:
            last_row = seq.iloc[-1].copy()
            last_date = last_row['date']
            for i in range(seq_len - len(seq)):
                new_row = last_row.copy()
                new_row['date'] = last_date + pd.Timedelta(days=i+1)
                seq = pd.concat([seq, pd.DataFrame([new_row])], ignore_index=True)

        X_seq_to_predict = seq.drop(columns=['date','reported_cases','workforce_required'], errors='ignore')

        if 'problem_type' in X_seq_to_predict.columns and 'region' in X_seq_to_predict.columns:
            cat_transformed = pd.DataFrame(encoder.transform(X_seq_to_predict[['problem_type','region']]),
                                           columns=encoder.get_feature_names_out(['problem_type','region']))
            X_seq_to_predict = pd.concat([X_seq_to_predict.drop(columns=['problem_type','region']), cat_transformed], axis=1)

        X_scaled = scaler_X.transform(X_seq_to_predict)
        X_input = np.array([X_scaled])
        y_pred_scaled = model.predict(X_input, verbose=0)
        predicted_cases = safe_numeric(int(round(scaler_y.inverse_transform(y_pred_scaled)[0,0])), 1)

        predicted_workforce, explanation = calculate_dynamic_workforce(predicted_cases, problem_type_input, df=df, weather=weather, return_explanation=True)

        return predicted_cases, predicted_workforce, explanation

    except Exception:
        return predict_random(problem_type_input, weather=weather)

In [207]:
if __name__ == "__main__":
    problem_type = input("Enter problem type: ").strip().title()
    target_date = input("Enter target date (YYYY-MM-DD, future only): ").strip()
    try:
        cases, workforce, explanation = predict(problem_type, target_date)
        print("\n=== Prediction Result ===")
        print(f"Date: {target_date}")
        print(f"Problem Type: {problem_type}")
        print(f"Predicted Reported Cases: {cases}")
        print(f"Predicted Workforce Required: {workforce}")
        print(f"\nExplanation: {explanation}")
    except Exception as e:
        print(f"[ERROR] {e}")

[ERROR] Cannot compare NaT with datetime.date object
