<a href="https://colab.research.google.com/github/norman-AI-2025/hackathon-2025/blob/main/fusion_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from typing import Callable

# The 'compute_text_score' function is passed as an argument from run_pipeline.ipynb
def compute_fusion_risk(
    df: pd.DataFrame,
    compute_text_score: Callable[[str], float],
    numeric_col: str = 'numeric_score',
    text_col: str = 'notes',
    alpha: float = 0.5  # Weight for numeric score
) -> pd.DataFrame:
    """
    Fuses numeric and text risk scores into a single final risk score (0-100).
    """

    df = df.copy()

    # 1) Compute text_score for each row using the function passed from run_pipeline
    print("  -> Calculating text scores...")
    df["text_score"] = df[text_col].apply(lambda x: compute_text_score(x))


    # 2) Fusion: numeric + text
    alpha = max(0.0, min(1.0, alpha))

    if numeric_col not in df.columns:
        raise ValueError(
            f"Column '{numeric_col}' not found in DataFrame. "
            "Run the numeric model first to calculate this score."
        )

    # Ensure scores are within [0,100] before fusion
    df[numeric_col] = df[numeric_col].clip(0, 100)
    df["text_score"] = df["text_score"].clip(0, 100)

    # Weighted Average for Final Risk Score
    df["risk_score"] = (alpha * df[numeric_col]) + ((1 - alpha) * df["text_score"])


    # 3) Categorize the final risk score
    def categorize_risk(score):
        if score >= 70:
            return "High Risk"
        elif score >= 30:
            return "Medium Risk"
        else:
            return "Low Risk"

    df["risk_label"] = df["risk_score"].apply(categorize_risk)
    print("  -> Fusion complete and risk labels assigned.")

    return df