## HRES PhD Framework Experimentation Notebook
**Author:** Md Shameem Hossain
**Purpose:** This notebook is used for ad-hoc analysis, scenario testing, and logging experimental runs of the HRES Decision Engine to MLflow. It also demonstrates how to package the decision logic as a versioned model in the MLflow Model Registry.

In [None]:
import pandas as pd
import mlflow
import sys
import os
import json # Added for json.dumps in MCDA model packaging

# Add the src directory to the Python path so this notebook can import our custom modules.
# This path is specific to the Jupyter container's WORKDIR and volume mounts.
sys.path.append('/home/jovyan/src')

# Import the definitive, class-based decision engine and ML predictor
from MCDA_model import HRES_Decision_Engine
from HRES_ML_Model import HRESMLPredictor

# --- 1. Load Resources ---
# Load the comprehensive dataset that was generated by our simulation engine
DATA_PATH = "/home/jovyan/src/HRES_Dataset.csv"
try:
    hres_df = pd.read_csv(DATA_PATH)
    print(f"✅ Successfully loaded dataset from {DATA_PATH} with {len(hres_df)} configurations.")
except FileNotFoundError:
    print(f"❌ ERROR: Dataset file not found at {DATA_PATH}. Please run HRES_Dataset_Generator.py first.")
    hres_df = pd.DataFrame() # Create empty DataFrame to prevent further errors

# Instantiate our decision engine with the loaded data
decision_engine = HRES_Decision_Engine(hres_df)
print(f"✅ HRES Decision Engine initialized.")

# --- 2. MLflow Setup ---
# This URI points to the mlflow service defined in our docker-compose.yml
MLFLOW_TRACKING_URI = "http://hres_mlflow:5000"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

EXPERIMENT_NAME = "HRES_Decision_Scenarios_PhD"
mlflow.set_experiment(EXPERIMENT_NAME)

print(f"✅ MLflow connection successful. Using experiment '{EXPERIMENT_NAME}'.")

### Run and Log Benchmark Scenarios
Here, we define several test scenarios with different user priorities. We then run each one through the full decision pipeline and log all inputs, outputs, and metadata to MLflow for tracking and comparison.

In [None]:
scenarios = {
    "University_High_ESG": {
        "scenario_name": "University_Campus",
        "annual_demand_kwh": 3000000,
        "user_grid_dependency_pct": 15,
        "esg_weights": {"environment": 0.5, "social": 0.4, "governance": 0.1, "cost": 0.0}
    },
    "Office_Cost_Focused": {
        "scenario_name": "Small_Office",
        "annual_demand_kwh": 250000,
        "user_grid_dependency_pct": 40,
        "esg_weights": {"environment": 0.1, "social": 0.1, "governance": 0.0, "cost": 0.8}
    },
    "Hospital_Resilience_Focused": {
        "scenario_name": "Hospital",
        "annual_demand_kwh": 1500000,
        "user_grid_dependency_pct": 10,
        "esg_weights": {"environment": 0.2, "social": 0.6, "governance": 0.1, "cost": 0.1}
    },
    "DataCenter_LowGrid_BalancedESG": {
        "scenario_name": "Data_Center",
        "annual_demand_kwh": 10000000,
        "user_grid_dependency_pct": 5, # Very low grid dependency
        "esg_weights": {"environment": 0.3, "social": 0.3, "governance": 0.2, "cost": 0.2}
    }
}

for run_name, params in scenarios.items():
    with mlflow.start_run(run_name=run_name) as run:
        print(f"--- Running scenario: {run_name} ---")
        # Log input parameters
        mlflow.log_params(params)

        best_solution, status, feasible_df, sorted_df, pareto_df = decision_engine.run_full_pipeline(
            params["scenario_name"],
            params["annual_demand_kwh"],
            params["user_grid_dependency_pct"],
            params["esg_weights"]
        )

        mlflow.set_tag("status", status)

        if best_solution is not None:
            # Log all KPIs from the recommended solution as metrics in MLflow
            # Exclude 'model_constants' as it's a dict, not a simple metric
            metrics_to_log = best_solution.drop(['scenario_name', 'esg_category', 'annual_demand_kwh']).to_dict()
            cleaned_metrics = {k: v for k, v in metrics_to_log.items() if isinstance(v, (int, float))}
            mlflow.log_metrics(cleaned_metrics)

            # Log the full recommendation as an artifact
            with open(f"/tmp/best_solution_{run_name}.json", "w") as f: # Save to /tmp for artifacts
                json.dump(best_solution.to_dict(), f, indent=2)
            mlflow.log_artifact(f"/tmp/best_solution_{run_name}.json")

            print(f"✅ Success. Logged recommendation to MLflow. Run ID: {run.info.run_id}")
        else:
            print(f"⚠️ No solution found for this scenario. Status: {status}")

print("\n🎉 All scenarios have been run and logged to MLflow.")

### Train and Register ML Prediction Models
This section demonstrates how to train simple ML models (Random Forests) using the generated dataset to predict key HRES outcomes. These models are logged and registered with MLflow, enabling faster inference for the 'ML Fast Predictor' UI tab.

In [None]:
if not hres_df.empty:
    predictor = HRESMLPredictor(model_name_suffix="_V1")
    predictor.train_and_log_models(DATA_PATH)
else:
    print("❌ Skipping ML model training: Dataset is empty.")

### Package and Register the Decision Logic (MCDA Model)
To meet MLOps requirements for governance and reproducibility, we package our `HRES_Decision_Engine` class into a standard `mlflow.pyfunc` format. This allows us to register it as a versioned model in the MLflow Model Registry, creating an immutable, auditable link between a model version and the results it produces.

In [None]:
import mlflow.pyfunc
import json

MODEL_NAME_MCDA = "HRES-MCDA-Decision-Engine"

# Create a wrapper class to make our custom logic compatible with the mlflow.pyfunc standard.
class HRES_Engine_Wrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        # This method runs once when the model is loaded.
        # FIX: Use the correct dataset path, as specified in the Docker mounts.
        DATA_PATH_IN_CONTAINER = "/home/jovyan/src/HRES_Dataset.csv"

        # Ensure src is in path for MCDA_model import
        if '/home/jovyan/src' not in sys.path:
            sys.path.append('/home/jovyan/src')

        # The MCDA_model relies on HRES_Dataset_Generator constants, so we ensure generator is importable too
        import HRES_Dataset_Generator # Just importing it to make constants available for MCDA_model

        hres_df_loaded = pd.read_csv(DATA_PATH_IN_CONTAINER)
        from MCDA_model import HRES_Decision_Engine
        self.engine = HRES_Decision_Engine(hres_df_loaded)
        print("✅ HRES_Engine_Wrapper loaded context and initialized Decision Engine.")

    def predict(self, context, model_input):
        # This method runs for every prediction request.
        results = []
        for _, row in model_input.iterrows():
            # The input will be a DataFrame, extract parameters
            scenario_name = row['scenario_name']
            annual_demand_kwh = row['annual_demand_kwh']
            user_grid_dependency_pct = row['user_grid_dependency_pct']
            esg_weights = json.loads(row['esg_weights']) # json.loads() to convert string back to dict

            solution, status, _, _, _ = self.engine.run_full_pipeline(
                scenario_name,
                annual_demand_kwh,
                user_grid_dependency_pct,
                esg_weights
            )
            if solution is not None:
                res_dict = solution.to_dict()
                res_dict['status'] = status
                results.append(res_dict)
            else:
                results.append({'status': status, 'recommendation': None})
        return pd.DataFrame(results)

# Log and register the wrapped MCDA model logic
if not hres_df.empty:
    with mlflow.start_run(run_name="Package_and_Register_MCDA_Logic") as run:
        # Example input for model signature
        input_example = pd.DataFrame([{\
            "scenario_name": "University_Campus",\
            "annual_demand_kwh": 3000000,\
            "user_grid_dependency_pct": 15.0,\
            "esg_weights": json.dumps({"environment": 0.5, "social": 0.4, "governance": 0.1, "cost": 0.0})\
        }])

        mlflow.pyfunc.log_model(
            artifact_path="mcda_model_logic",
            python_model=HRES_Engine_Wrapper(),
            input_example=input_example,
            registered_model_name=MODEL_NAME_MCDA,
            conda_env={\
                'channels': ['conda-forge'],\
                'dependencies': [\
                    'python=3.9',\
                    'pandas',\
                    'numpy',\
                    'scikit-learn',\
                    'tqdm',\
                    'psycopg2-binary',\
                    'sqlalchemy',\
                    'alembic',\
                    'pyyaml',\
                    'flask' # Flask is needed for API, ensure it's in env
                ],\
                'name': 'hres_mcda_env'\
            }\
        )

    print(f"✅ Decision logic has been packaged and registered as model '{MODEL_NAME_MCDA}' in the MLflow Model Registry.")
else:\
    print("❌ Skipping MCDA model packaging: Dataset is empty.")