# Quick Start

Below is a simple demo of interaction with the environment.

In [1]:
from maro.simulator import Env
from maro.simulator.scenarios.citi_bike.common import Action, DecisionEvent

env = Env(scenario="citi_bike", topology="toy.3s_4t", start_tick=0, durations=1440, snapshot_resolution=30)

metrics: object = None
decision_event: DecisionEvent = None
is_done: bool = False

while not is_done:
    action: Action = None
    metrics, decision_event, is_done = env.step(action)

print(metrics)

12:03:56 | INFO    | [32mGenerating trip data for topology toy.3s_4t .[0m
12:03:57 | INFO    | [32mBuilding binary data from ~/.maro/data/citi_bike/.source/.clean/toy.3s_4t/b37f36f2de334eb3/trips.csv to ~/.maro/data/citi_bike/.build/toy.3s_4t/b37f36f2de334eb3/trips.bin[0m
{'trip_requirements': 2156, 'bike_shortage': 1163, 'operation_number': 0}


# Environment of the bike repositioning

To initialize an environment, you need to specify the values of several parameters:
- **scenario**: The target scenario of this Env.
  - `citi_bike` denotes for the bike repositioning.
- **topology**: The target topology of this Env. As shown below, you can get the predefined topology list by calling `get_topologies(scenario='citi_bike')`
- **start_tick**: The start tick of this Env, 1 tick corresponds to 1 minute in citi_bike.
   - In the demo above, `start_tick=0` indicates a simulation start from the beginning of the given topology.
- **durations**: The duration of thie Env, in the unit of tick/minute.
   - In the demo above, `durations=1440` indicates a simulation length of 1 day (24h * 60min/h).
- **snapshot_resolution**: The time granularity of maintaining the snapshots of the environments, in the unit of tick/minute.
   - In the demo above, `snapshot_resolution=30` indicates that a snapshot will be created and saved every 30 minutes during the simulation.

You can get all available scenarios and topologies by calling:

In [2]:
from maro.simulator.utils import get_scenarios, get_topologies
from pprint import pprint
from typing import List

scenarios: List[str] = get_scenarios()
topologies: List[str] = get_topologies(scenario='citi_bike')

pprint(f'The available scenarios in MARO:')
pprint(scenarios)

print()
pprint(f'The predefined topologies in Citi Bike:')
pprint(topologies)

'The available scenarios in MARO:'
['cim', 'citi_bike']

'The predefined topologies in Citi Bike:'
['ny.201801',
 'ny.201802',
 'ny.201803',
 'ny.201804',
 'ny.201805',
 'ny.201806',
 'ny.201807',
 'ny.201808',
 'ny.201809',
 'ny.201810',
 'ny.201811',
 'ny.201812',
 'ny.201901',
 'ny.201902',
 'ny.201903',
 'ny.201904',
 'ny.201905',
 'ny.201906',
 'ny.201907',
 'ny.201908',
 'ny.201909',
 'ny.201910',
 'ny.201911',
 'ny.201912',
 'ny.202001',
 'ny.202002',
 'ny.202003',
 'ny.202004',
 'ny.202005',
 'ny.202006',
 'toy.3s_4t',
 'toy.4s_4t',
 'toy.5s_6t']


Once you created an instance of the environment, you can easily access the real-time information of this environment, like:

In [3]:
from maro.backends.frame import SnapshotList
from maro.simulator import Env
from pprint import pprint
from typing import List


# Initialize an Env for citi_bike scenario
env = Env(scenario="citi_bike", topology="toy.3s_4t", start_tick=0, durations=1440, snapshot_resolution=30)

# The current tick
tick: int = env.tick
print(f"The current tick: {tick}.")

# The current frame index, which indicates the index of current frame in the snapshot-list
frame_index: int = env.frame_index
print(f"The current frame index: {frame_index}.")

# The agent index list in the environment
agent_idx_list: List[int] = env.agent_idx_list
print(f"There are {len(agent_idx_list)} agents in this Env.")

# The whole snapshot-list of the environment, snapshots are taken in the granularity of the given snapshot_resolution
# The example of how to use the snapshot will be shown later
snapshot_list: SnapshotList = env.snapshot_list
print(f"There will be {len(snapshot_list)} snapshots in total.")

# The summary info of the environment
summary: dict = env.summary
print(f"\nEnv Summary:")
pprint(summary)

# The metrics of the environment
metrics: dict = env.metrics
print(f"\nEnv Metrics:")
pprint(metrics)

12:04:06 | INFO    | [32mGenerating trip data for topology toy.3s_4t .[0m
12:04:07 | INFO    | [32mBuilding binary data from ~/.maro/data/citi_bike/.source/.clean/toy.3s_4t/0ad85988e84a43bd/trips.csv to ~/.maro/data/citi_bike/.build/toy.3s_4t/0ad85988e84a43bd/trips.bin[0m
The current tick: 0.
The current frame index: 0.
There are 3 agents in this Env.
There will be 48 snapshots in total.

Env Summary:
{'node_detail': {'matrices': {'attributes': {'trips_adj': {'slots': 9,
                                                           'type': 'i'}},
                              'number': 1},
                 'stations': {'attributes': {'bikes': {'slots': 1, 'type': 'i'},
                                             'capacity': {'slots': 1,
                                                          'type': 'i'},
                                             'extra_cost': {'slots': 1,
                                                            'type': 'i'},
                                 

# Interaction with the environment

Before starting interaction with the environment, we need to know **DecisionEvent** and **Action** first.

## DecisionEvent

Once the environment need the agent's response to promote the simulation, it will throw an **DecisionEvent**. In the scenario of citi_bike, the information of each `DecisionEvent` is listed as below:
- **station_idx**: (int) The id of the station/agent that needs to respond to the environment;
- **tick**: (int) The corresponding tick;
- **frame_index**: (int) The corresponding frame index, that is the index of the corresponding snapshot in the snapshot list;
- **type**: (DecisionType) The decision type of this decision event. In citi_bike scenario, there are two types:
   - `Supply` indicates there is too many bikes in the corresponding station, so it is better to reposition some of them to other stations.
   - `Demand` indicates there is no enough bikes in the corresponding station, so it is better to reposition bikes from other stations
- **action_scope**: (Dict) A dictionary that maintains the information for calculating the valid action scope:
   - The key of the item indicates the station/agent id;
   - The meaning of the value differs for different decision type:
      - If the decision type is `Supply`, the value of the station itself means its bike inventory at that moment, while the value of other target stations means the number of their empty docks;
      - If the decision type is `Demand`, the value of the station itself means the number of its empty docks, while the value of other target stations means their bike inventory.

## Action

Once we get a `DecisionEvent` from the envirionment, we should respond with an `Action`. Valid `Action` could be:
- `None`, which means do nothing.
- A valid `Action` instance, including:
   - **from_station_idx**: (int) The id of the source station of the bike transportation
   - **to_station_idx**: (int) The id of the destination station of the bike transportation
   - **number**: (int) The quantity of the bike transportation

## Generate random actions based on the DecisionEvent

The demo code in the Quick Start part has shown an interaction mode that doing nothing(responding with `None` action). Here we read the detailed information about the `DecisionEvent` and generate random `Action` based on it.

In [4]:
from maro.simulator import Env
from maro.simulator.scenarios.citi_bike.common import Action, DecisionEvent, DecisionType

import random

# Initialize an Env for citi_bike scenario
env = Env(scenario="citi_bike", topology="toy.3s_4t", start_tick=0, durations=1440, snapshot_resolution=30)

metrics: object = None
decision_event: DecisionEvent = None
is_done: bool = False
action: Action = None

# Start the env with a None Action
metrics, decision_event, is_done = env.step(action)

while not is_done:
    if decision_event.type == DecisionType.Supply:
        # Supply: the value of the station itself means the bike inventory
        self_bike_inventory = decision_event.action_scope[decision_event.station_idx]
        # Supply: the value of other stations means the quantity of empty docks
        target_idx_dock_tuple_list = [
            (k, v) for k, v in decision_event.action_scope.items() if k != decision_event.station_idx
        ]
        # Randomly choose a target station weighted by the quantity of empty docks
        target_idx, target_dock = random.choices(
            target_idx_dock_tuple_list,
            weights=[item[1] for item in target_idx_dock_tuple_list]
        )[0]
        # Generate the corresponding random Action
        action = Action(
            from_station_idx=decision_event.station_idx,
            to_station_idx=target_idx,
            number=random.randint(0, min(self_bike_inventory, target_dock))
        )

    elif decision_event.type == DecisionType.Demand:
        # Demand: the value of the station itself means the quantity of empty docks
        self_available_dock = decision_event.action_scope[decision_event.station_idx]
        # Demand: the value of other stations means their bike inventory
        target_idx_inventory_tuple_list = [
            (k, v) for k, v in decision_event.action_scope.items() if k != decision_event.station_idx
        ]
        # Randomly choose a target station weighted by the bike inventory
        target_idx, target_inventory = random.choices(
            target_idx_inventory_tuple_list,
            weights=[item[1] for item in target_idx_inventory_tuple_list]
        )[0]
        # Generate the corresponding random Action
        action = Action(
            from_station_idx=target_idx,
            to_station_idx=decision_event.station_idx,
            number=random.randint(0, min(self_available_dock, target_inventory))
        )

    else:
        action = None
    
    # Randomly sample some records to show in the output
    if random.random() > 0.98:
        print(f"*************\n{decision_event}\n{action}")
    
    # Respond the environment with the generated Action
    metric, decision_event, is_done = env.step(action)

12:04:16 | INFO    | [32mGenerating trip data for topology toy.3s_4t .[0m
12:04:17 | INFO    | [32mBuilding binary data from ~/.maro/data/citi_bike/.source/.clean/toy.3s_4t/56e2ca55ba6d4881/trips.csv to ~/.maro/data/citi_bike/.build/toy.3s_4t/56e2ca55ba6d4881/trips.bin[0m
*************
DecisionEvent(tick=79, station_idx=1, type=DecisionType.Demand, action_scope={0: 0, 2: 0, 1: 30})
Action(from_station_idx=2, to_station_idx=1, number=0)
*************
DecisionEvent(tick=799, station_idx=2, type=DecisionType.Demand, action_scope={0: 0, 1: 0, 2: 30})
Action(from_station_idx=1, to_station_idx=2, number=0)
*************
DecisionEvent(tick=959, station_idx=2, type=DecisionType.Demand, action_scope={0: 1, 1: 1, 2: 30})
Action(from_station_idx=0, to_station_idx=2, number=0)


## Get the environment observation

You can also implement other strategies or build models to take action. At this time, real-time information and historical records of the environment are very important for making good decisions. In this case, the the environment snapshot list is exactly what you need.

The information in the snapshot list is indexed by 3 dimensions:
- A frame index (list). (int / List[int]) Empty indicates for all time slides till now
- A station id (list). (int / List[int]) Empty indicates for all stations/agents
- An Attribute name (list). (str / List[str]) You can get all available attributes in `env.summary` as shown before.

The return value from the snapshot list is a numpy.ndarray with shape **(num_frame * num_station * num_attribute, )**.

More detailed introduction to the snapshot list is [here](https://maro.readthedocs.io/en/latest/key_components/data_model.html#advanced-features).

In [5]:
from maro.simulator import Env
from pprint import pprint


# Initialize an Env for citi_bike scenario
env = Env(scenario="citi_bike", topology="toy.3s_4t", start_tick=0, durations=1440, snapshot_resolution=30)

# To get the attribute list that can be accessed in snapshot_list
pprint(env.summary['node_detail'], depth=2)
print()
# The attribute list of stations
pprint(env.summary['node_detail']['stations'])

12:04:26 | INFO    | [32mGenerating trip data for topology toy.3s_4t .[0m
12:04:27 | INFO    | [32mBuilding binary data from ~/.maro/data/citi_bike/.source/.clean/toy.3s_4t/278c7458875b4474/trips.csv to ~/.maro/data/citi_bike/.build/toy.3s_4t/278c7458875b4474/trips.bin[0m
{'matrices': {'attributes': {...}, 'number': 1},
 'stations': {'attributes': {...}, 'number': 3}}

{'attributes': {'bikes': {'slots': 1, 'type': 'i'},
                'capacity': {'slots': 1, 'type': 'i'},
                'extra_cost': {'slots': 1, 'type': 'i'},
                'failed_return': {'slots': 1, 'type': 'i'},
                'fulfillment': {'slots': 1, 'type': 'i'},
                'holiday': {'slots': 1, 'type': 'i2'},
                'id': {'slots': 1, 'type': 'i'},
                'min_bikes': {'slots': 1, 'type': 'i'},
                'shortage': {'slots': 1, 'type': 'i'},
                'temperature': {'slots': 1, 'type': 'i2'},
                'transfer_cost': {'slots': 1, 'type': 'i'},
        

In [None]:
from maro.backends.frame import SnapshotList
from maro.simulator import Env
from pprint import pprint
from typing import List


# Initialize an Env for citi_bike scenario
env = Env(scenario="citi_bike", topology="toy.3s_4t", start_tick=0, durations=1440, snapshot_resolution=30)

# Start the environment with None action
_, decision_event, is_done = env.step(None)

while not is_done:
    # Case of access snapshot after a certain number of frames
    if env.frame_index >= 24:
        # The frame list of past 2 hours
        past_2hour_frames = [x for x in range(env.frame_index - 4, env.frame_index)]
        decision_station_idx = decision_event.station_idx
        intr_station_infos = ["trip_requirement", "bikes", "shortage"]

        # Query the snapshot list of this environment to get the information of
        # the trip requirements, bikes, shortage of the decision station in the past 2 days
        past_2hour_info = env.snapshot_list["stations"][
            past_2hour_frames : decision_station_idx : intr_station_infos
        ]
        pprint(past_2hour_info)
        
        # This demo code is used to show how to access the information in snapshot,
        # so we terminate the env here for clear output
        break

    # Drive the environment with None action
    _, decision_event, is_done = env.step(None)

12:04:35 | INFO    | [32mGenerating trip data for topology toy.3s_4t .[0m
12:04:36 | INFO    | [32mBuilding binary data from ~/.maro/data/citi_bike/.source/.clean/toy.3s_4t/80aa59d9c58d4b5e/trips.csv to ~/.maro/data/citi_bike/.build/toy.3s_4t/80aa59d9c58d4b5e/trips.bin[0m
array([11.,  0., 11., 15.,  0., 15., 16.,  0., 16., 16.,  0., 16.],
      dtype=float32)
