## HRES PhD Framework Experimentation Notebook
**Author:** Md Shameem Hossain
**Purpose:** This notebook is used for ad-hoc analysis, scenario testing, and logging experimental runs of the HRES Decision Engine to MLflow. It also demonstrates how to package the decision logic as a versioned model in the MLflow Model Registry.

In [None]:
import pandas as pd
import mlflow
import sys
import os
import json

# Add the src directory to the Python path. This is crucial for importing custom modules.
# The path '/home/jovyan/src' is specific to the Jupyter container's environment.
sys.path.append('/home/jovyan/src')

# Import the custom modules after setting the path
from MCDA_model import HRES_Decision_Engine
from HRES_ML_Model import HRESMLPredictor

# --- 1. Load Resources ---
DATA_PATH = "/home/jovyan/src/HRES_Dataset.csv"
try:
    hres_df = pd.read_csv(DATA_PATH)
    print(f"✅ Successfully loaded dataset from {DATA_PATH} with {len(hres_df)} configurations.")
except FileNotFoundError:
    print(f"❌ ERROR: Dataset file not found at {DATA_PATH}. Please run the generator first.")
    # To make the notebook runnable, create an empty DataFrame to prevent errors in subsequent cells.
    hres_df = pd.DataFrame()

if not hres_df.empty:
    decision_engine = HRES_Decision_Engine(hres_df)
    print(f"✅ HRES Decision Engine initialized.")

# --- 2. MLflow Setup ---
# This URI points to the mlflow service defined in docker-compose.yml
MLFLOW_TRACKING_URI = "http://hres_mlflow:5000"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

EXPERIMENT_NAME = "HRES_Decision_Scenarios_PhD"
mlflow.set_experiment(EXPERIMENT_NAME)

print(f"✅ MLflow connection successful. Using experiment '{EXPERIMENT_NAME}'.")

### Run and Log Benchmark Scenarios
Here, we define several test scenarios with different user priorities. We then run each one through the full decision pipeline and log all inputs, outputs, and metadata to MLflow for tracking and comparison.

In [None]:
scenarios = {
    "University_High_ESG": {
        "scenario_name": "University_Campus", "annual_demand_kwh": 3000000, "user_grid_dependency_pct": 15,
        "esg_weights": {"environment": 0.5, "social": 0.4, "governance": 0.1, "cost": 0.0}
    },
    "Office_Cost_Focused": {
        "scenario_name": "Small_Office", "annual_demand_kwh": 250000, "user_grid_dependency_pct": 40,
        "esg_weights": {"environment": 0.1, "social": 0.1, "governance": 0.0, "cost": 0.8}
    },
    "Hospital_Resilience_Focused": {
        "scenario_name": "Hospital", "annual_demand_kwh": 1500000, "user_grid_dependency_pct": 10,
        "esg_weights": {"environment": 0.2, "social": 0.6, "governance": 0.1, "cost": 0.1}
    },
    "DataCenter_LowGrid_BalancedESG": {
        "scenario_name": "Data_Center", "annual_demand_kwh": 10000000, "user_grid_dependency_pct": 5,
        "esg_weights": {"environment": 0.3, "social": 0.3, "governance": 0.2, "cost": 0.2}
    }
}

if not hres_df.empty:
    for run_name, params in scenarios.items():
        with mlflow.start_run(run_name=run_name) as run:
            print(f"--- Running scenario: {run_name} ---")

            # Log input parameters for the run
            mlflow.log_params(params)

            # Execute the decision pipeline
            best_solution, status, _, _, _ = decision_engine.run_full_pipeline(**params)
            mlflow.set_tag("status", status)

            if best_solution is not None:
                # Filter for numeric types to log as metrics
                metrics_to_log = {k: v for k, v in best_solution.to_dict().items() if isinstance(v, (int, float))}
                mlflow.log_metrics(metrics_to_log)

                # Save the full detailed solution as a JSON artifact
                solution_path = f"/tmp/best_solution_{run_name}.json" # Use /tmp as it's a writable directory
                with open(solution_path, "w") as f:
                    # Use a custom encoder to handle numpy types if they appear
                    class NpEncoder(json.JSONEncoder):
                        def default(self, obj):
                            if isinstance(obj, np.integer): return int(obj)
                            if isinstance(obj, np.floating): return float(obj)
                            if isinstance(obj, np.ndarray): return obj.tolist()
                            return super(NpEncoder, self).default(obj)
                    json.dump(best_solution.to_dict(), f, indent=2, cls=NpEncoder)

                mlflow.log_artifact(solution_path)
                print(f"✅ Success. Logged recommendation to MLflow. Run ID: {run.info.run_id}")
            else:
                print(f"⚠️ No solution found for this scenario. Status: {status}")

    print("\n🎉 All scenarios have been run and logged to MLflow.")
else:
    print("Skipping scenario logging as dataset is not loaded.")

### Train and Register ML Prediction Models
This section trains the Random Forest models using the generated dataset and registers them with MLflow. This is the same logic that the Airflow DAG runs, but is useful for interactive development and tuning.

In [None]:
if not hres_df.empty:
    predictor = HRESMLPredictor(model_name_suffix="_V1")
    predictor.train_and_log_models(DATA_PATH)
else:
    print("❌ Skipping ML model training: Dataset is empty.")

### Package and Register the Decision Logic (MCDA Model)
To meet MLOps requirements for governance and reproducibility, we package our `HRES_Decision_Engine` class into a standard `mlflow.pyfunc` format. This allows us to register it as a versioned model in the MLflow Model Registry, creating an immutable, auditable link between a model version and the results it produces.

In [None]:
import mlflow.pyfunc
import json
import numpy as np # Import numpy for the custom JSON encoder

MODEL_NAME_MCDA = "HRES-MCDA-Decision-Engine"

# Create a wrapper class to make our custom logic compatible with the mlflow.pyfunc standard.
class HRES_Engine_Wrapper(mlflow.pyfunc.PythonModel):

    def load_context(self, context):
        # This method runs once when the model is loaded for inference.
        DATA_PATH_IN_CONTAINER = "/home/jovyan/src/HRES_Dataset.csv"

        if '/home/jovyan/src' not in sys.path:
            sys.path.append('/home/jovyan/src')

        hres_df_loaded = pd.read_csv(DATA_PATH_IN_CONTAINER)
        from MCDA_model import HRES_Decision_Engine
        self.engine = HRES_Decision_Engine(hres_df_loaded)
        print("✅ HRES_Engine_Wrapper loaded context and initialized Decision Engine.")

    def predict(self, context, model_input):
        # This method runs for every prediction request.
        # model_input is a pandas DataFrame.
        results = []
        for _, row in model_input.iterrows():
            # The 'esg_weights' column will be a JSON string, so we must load it.
            esg_weights = json.loads(row['esg_weights'])

            solution, status, _, _, _ = self.engine.run_full_pipeline(
                row['scenario_name'],
                row['annual_demand_kwh'],
                row['user_grid_dependency_pct'],
                esg_weights
            )

            if solution is not None:
                res_dict = solution.to_dict()
                res_dict['status'] = status
                results.append(res_dict)
            else:
                results.append({'status': status, 'recommendation': None})

        return pd.DataFrame(results)

# A custom JSON encoder is needed to handle potential numpy types in the solution series
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer): return int(obj)
        if isinstance(obj, np.floating): return float(obj)
        if isinstance(obj, np.ndarray): return obj.tolist()
        return super(NpEncoder, self).default(obj)

# Log and register the wrapped MCDA model logic
if not hres_df.empty:
    with mlflow.start_run(run_name="Package_and_Register_MCDA_Logic") as run:
        # Provide an example of the input DataFrame for the model signature
        input_example = pd.DataFrame([{
            "scenario_name": "University_Campus",
            "annual_demand_kwh": 3000000,
            "user_grid_dependency_pct": 15.0,
            # IMPORTANT: The esg_weights must be a JSON string in the input
            "esg_weights": json.dumps({"environment": 0.5, "social": 0.4, "governance": 0.1, "cost": 0.0})
        }])

        mlflow.pyfunc.log_model(
            artifact_path="mcda_model_logic",
            python_model=HRES_Engine_Wrapper(),
            input_example=input_example,
            registered_model_name=MODEL_NAME_MCDA,
            conda_env={
                'channels': ['conda-forge'],
                'dependencies': [
                    f'python=3.9',
                    f'pandas=={pd.__version__}',
                    f'scikit-learn',
                    'numpy',
                    'tqdm'
                ],
                'name': 'hres_mcda_env'
            }
        )
    print(f"✅ Decision logic has been packaged and registered as model '{MODEL_NAME_MCDA}' in the MLflow Model Registry.")
else:
    print("❌ Skipping MCDA model packaging: Dataset is empty.")