In [None]:
import heapq
import random

from maro.simulator import Env
from maro.simulator.scenarios.citi_bike.common import Action, DecisionEvent, DecisionType


class GreedyAgent:
    def __init__(self, supply_top_k: int = 1, demand_top_k: int = 1):
        """
        Agent that executes a greedy policy. If the event type is supply, send as many bikes as possible to one of the
        demand_k stations with the most empty slots. If the event type is demand, request as many bikes as possible from
        one of the supply_k stations with the most bikes.

        Args:
            supply_top_k (int): number of top supply candidates to choose from.
            demand_top_k (int): number of top demand candidates to choose from.
        """
        self._supply_top_k = supply_top_k
        self._demand_top_k = demand_top_k

    def choose_action(self, decision_event: DecisionEvent):
        if decision_event.type == DecisionType.Supply:
            # find k target stations with the most empty slots, randomly choose one of them and send as many bikes to
            # it as allowed by the action scope
            top_k_demands = []
            for demand_candidate, available_docks in decision_event.action_scope.items():
                if demand_candidate == decision_event.station_idx:
                    continue

                heapq.heappush(top_k_demands, (available_docks, demand_candidate))
                if len(top_k_demands) > self._demand_top_k:
                    heapq.heappop(top_k_demands)

            max_reposition, target_station_idx = random.choice(top_k_demands)
            action = Action(decision_event.station_idx, target_station_idx, max_reposition)
        else:
            # find k source stations with the most bikes, randomly choose one of them and request as many bikes from
            # it as allowed by the action scope
            top_k_supplies = []
            for supply_candidate, available_bikes in decision_event.action_scope.items():
                if supply_candidate == decision_event.station_idx:
                    continue

                heapq.heappush(top_k_supplies, (available_bikes, supply_candidate))
                if len(top_k_supplies) > self._supply_top_k:
                    heapq.heappop(top_k_supplies)

            max_reposition, source_idx = random.choice(top_k_supplies)
            action = Action(source_idx, decision_event.station_idx, max_reposition)

        return action

In [None]:
# Simulation with the greedy policy 
env = Env(scenario="citi_bike", topology="ny.201801", start_tick=0, durations=2880, snapshot_resolution=10)
agent = GreedyAgent()
metrics, decision_event, done = env.step(None)
while not done:
    metrics, decision_event, done = env.step(agent.choose_action(decision_event))

print(f"Greedy agent policy performance: {env.metrics}")
env.reset()