In [15]:
import pandas as pd
import numpy as np
from datetime import datetime
from tensorflow.keras.models import load_model
import joblib
import sys

In [16]:
# === Load model & scalers ===
try:
    model = load_model('./model/model.keras', compile=False)
    model.compile(optimizer='adam', loss='mse')
    scaler_X = joblib.load('./model/scaler_X.pkl')
    scaler_y = joblib.load('./model/scaler_y.pkl')
    encoder = joblib.load('./model/encoder.pkl')
except FileNotFoundError as e:
    sys.exit(f"[FATAL] Missing file: {e.filename}")

In [17]:
def validate_date(target_date_str):
    try:
        target_date = pd.Timestamp(target_date_str)
    except:
        raise ValueError("Invalid date format. Use YYYY-MM-DD")
    if target_date.date() < datetime.today().date():
        raise ValueError("Past dates are not allowed. Select a future date.")
    return target_date

def safe_numeric(value, default=0.0):
    if pd.isna(value) or np.isinf(value):
        return default
    return value

In [18]:
def calculate_dynamic_workforce(predicted_cases, severity_score=2, avg_cases_per_worker=1.5, urgency_factor=1.0):
    workforce = (predicted_cases / avg_cases_per_worker)
    workforce *= (1 + 0.2 * (severity_score - 1))
    workforce *= urgency_factor
    return max(1, int(round(workforce)))

def predict_random(problem_type):
    base_cases_dict = {
        "Garbage & Waste": (10, 50),
        "Water Supply": (5, 20),
        "Road Maintenance": (1, 10),
        "Electricity": (2, 15),
        "Public Safety": (1, 8)
    }
    min_cases, max_cases = base_cases_dict.get(problem_type, (1, 20))
    predicted_cases = np.random.randint(min_cases, max_cases + 1)
    severity_score = np.random.randint(1, 5)
    predicted_workforce = calculate_dynamic_workforce(predicted_cases, severity_score)
    return predicted_cases, predicted_workforce

In [19]:
def predict(problem_type_input, target_date_str):
    target_date = validate_date(target_date_str)
    try:
        df = pd.read_csv("data/dataset2.csv")
        df['date'] = pd.to_datetime(df['date'])
        df.fillna(method='ffill', inplace=True)

        subset = df[df['problem_type'].str.strip().str.title() == problem_type_input.title()].sort_values('date')
        if subset.empty:
            raise ValueError(f"No historical data for {problem_type_input}")

        seq_len = 30
        seq = subset.tail(seq_len).copy()
        if len(seq) < seq_len:
            last_row = seq.iloc[-1].copy()
            last_date = last_row['date']
            for i in range(seq_len - len(seq)):
                new_row = last_row.copy()
                new_row['date'] = last_date + pd.Timedelta(days=i+1)
                seq = pd.concat([seq, pd.DataFrame([new_row])], ignore_index=True)

        
        X_seq_to_predict = seq.drop(columns=['date','reported_cases','workforce_required'], errors='ignore')
        
        if 'problem_type' in X_seq_to_predict.columns and 'region' in X_seq_to_predict.columns:
            cat_transformed = pd.DataFrame(encoder.transform(X_seq_to_predict[['problem_type','region']]),
                                           columns=encoder.get_feature_names_out(['problem_type','region']))
            X_seq_to_predict = pd.concat([X_seq_to_predict.drop(columns=['problem_type','region']), cat_transformed], axis=1)

        X_scaled = scaler_X.transform(X_seq_to_predict)
        X_input = np.array([X_scaled])
        y_pred_scaled = model.predict(X_input, verbose=0)
        predicted_cases = safe_numeric(int(round(scaler_y.inverse_transform(y_pred_scaled)[0,0])), 1)

        severity_score = int(seq['severity_score'].iloc[-1]) if 'severity_score' in seq.columns else 2

        predicted_workforce = calculate_dynamic_workforce(predicted_cases, severity_score)

        return predicted_cases, predicted_workforce

    except Exception as e:
        return predict_random(problem_type_input)

In [20]:
if __name__ == "__main__":
    problem_type = input("Enter problem type (e.g., Garbage & Waste): ").strip().title()
    target_date = input("Enter target date (YYYY-MM-DD, future only): ").strip()

    try:
        cases, workforce = predict(problem_type, target_date)
        print("\n=== Prediction Result ===")
        print(f"Date: {target_date}")
        print(f"Problem Type: {problem_type}")
        print(f"Predicted Reported Cases: {cases}")
        print(f"Predicted Workforce Required: {workforce}")
    except Exception as e:
        print(f"[ERROR] {e}")


=== Prediction Result ===
Date: 2025-10-13
Problem Type: Garbage & Waste
Predicted Reported Cases: 16
Predicted Workforce Required: 15


  df.fillna(method='ffill', inplace=True)
