<a href="https://colab.research.google.com/github/norman-AI-2025/hackathon-2025/blob/main/fusion_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# fusion_model.py

from typing import List
import pandas as pd
from text_analysis import compute_text_score


def compute_fusion_scores(
    df: pd.DataFrame,
    text_col: str = "notes",
    numeric_col: str = "numeric_score",
    alpha: float = 0.6,
) -> pd.DataFrame:
    """
    Combine numeric_score (0–100) and text_score (0–100)
    into a final risk_score.

    alpha = weight for numeric vs text:
      risk_score = alpha * numeric_score + (1 - alpha) * text_score

    Columns required:
      - numeric_col (e.g. 'numeric_score')
      - text_col (e.g. 'notes')

    Adds:
      - text_score
      - risk_score
      - risk_label
    """

    df = df.copy()

    # 1) Compute text_score for each row
    text_scores: List[float] = []
    for _, row in df.iterrows():
        text = row.get(text_col, "")
        text_score = compute_text_score(text)
        text_scores.append(text_score)

    df["text_score"] = text_scores

    # 2) Fusion: numeric + text
    alpha = max(0.0, min(1.0, alpha))  # force into [0,1]

    if numeric_col not in df.columns:
        raise ValueError(
            f"Column '{numeric_col}' not found in DataFrame. "
            "Make sure you computed numeric_score first."
        )

    # Ensure numeric_score is within [0,100]
    df[numeric_col] = df[numeric_col].clip(0, 100)
    df["text_score"] = df["text_score"].clip(0, 100)

    df["risk_score"] = alpha * df[numeric_col] + (1.0 - alpha) * df["text_score"]

    # 3) Label the risk level
    def label_risk(score: float) -> str:
        if score >= 70:
            return "HIGH"
        elif score >= 40:
            return "MEDIUM"
        else:
            return "LOW"

    df["risk_label"] = df["risk_score"].apply(label_risk)

    return df
