In [3]:
from typing import List, Dict 
from Agents import Prosumer


In [4]:
class Regulator:
    """
    Regulator that observes prosumer behavior and applies
    reward/punishment rules to achieve a system-level objective.
    """

    def __init__(
        self,
        objective: str = "maximize_p2p",
        punish_threshold: float = 0.1,
        reward_threshold1: float = 0.5,
        reward_threshold2: float = 0.8,
        reward_amount1: float = 0.1,
        reward_amount2: float = 0.3,
        reward_amount3: float = 0.5
    ):
        self.objective = objective
        self.punish_threshold = punish_threshold
        self.reward_threshold1 = reward_threshold1
        self.reward_threshold2 = reward_threshold2
        self.reward_amount1 = reward_amount1
        self.reward_amount2 = reward_amount2
        self.reward_amount3 = reward_amount3

    # --------------------------------------------------
    # System-level objective evaluation
    # --------------------------------------------------
    def evaluate_objective(self, stats_t: Dict) -> float:
        """
        Evaluate the regulator objective based on community statistics.
        """

        if self.objective == "maximize_p2p":
            return stats_t.get("p2p_share", 0.0)

        # if we want to maximize profit what we are going to do 
        if self.objective == "maximize_profit":
            return stats_t.get("community_profit", 0.0)

        return 0.0

    # --------------------------------------------------
    # Apply regulation rules
    # --------------------------------------------------
    def apply_rules(self, prosumers: List[Prosumer]) -> None:
        """
        Apply reward and punishment rules to each prosumer
        based on behavior in the previous time step.
        """

        for p in prosumers:

            # Ban lasts only one step
            p.banned = False

            # No surplus => nothing to evaluate
            if p.surplus_today <= 0:
                p.reset_step_metrics()
                continue

            # Participation ratio
            participation_ratio = (
                p.p2p_traded_today / (p.surplus_today + 1e-6)
            )

            # Punishment
            if participation_ratio < self.punish_threshold:
                p.banned = True
                # Agent "learns" that he must be more cooperative in future to avoid being banned
                p.trade_fraction = min(1.0, p.trade_fraction + 0.1)

            # Reward
            elif self.punish_threshold <= participation_ratio < self.reward_threshold1:
                p.money += self.reward_amount
                # with the reward, prosumer becomes more cooperative in future and trades more
                # However, the behavior boost decreases after a certain level despite the increase of reward
                # (The reward system becomes insensitive after a certain level)
                p.trade_fraction = min(1.0, p.trade_fraction + 0.05)

            elif self.reward_threshold1 <= participation_ratio < self.reward_threshold2:
                p.money += self.reward_amount2
                p.trade_fraction = min(1.0, p.trade_fraction + 0.03)

            elif participation_ratio >= self.reward_threshold2:
                p.money += self.reward_amount3
                p.trade_fraction = min(1.0, p.trade_fraction + 0.01)


            # Reset metrics for next step
            p.reset_step_metrics()