In [None]:
# agents/validator.py
import numpy as np
from sklearn.metrics import accuracy_score

class Validator:
    """
    Validator evaluates recommendations. Provides:
      - shaped reward combining env reward and alignment with insight
      - offline metrics calculation functions
    """
    def __init__(self, alignment_coef=0.5, token_penalty=0.01):
        self.alignment_coef = alignment_coef
        self.token_penalty = token_penalty

    def compute_alignment(self, insight_text, action):
        """
        Very simple heuristic: if insight contains keywords matching action, +1; else 0.
        In production you replace with semantic similarity between action description and insight.
        """
        insight = insight_text.lower()
        if isinstance(action, (list, tuple)):
            action_str = " ".join(map(str, action)).lower()
        else:
            action_str = str(action).lower()
        # heuristic
        return 1.0 if any(tok in insight for tok in action_str.split()) else 0.0

    def shaped_reward(self, env_reward, insight_text, action, tokens_used):
        align = self.compute_alignment(insight_text, action)
        shaped = env_reward + self.alignment_coef * align - self.token_penalty * tokens_used
        return shaped

    def offline_metrics(self, y_true, y_pred):
        return {
            "accuracy": float(accuracy_score(y_true, y_pred))
        }
