### Imports

In [1]:
!pip install git+https://github.com/sarah-keren/MAC


Collecting git+https://github.com/sarah-keren/MAC
  Cloning https://github.com/sarah-keren/MAC to /tmp/pip-req-build-m5egstmt
  Running command git clone -q https://github.com/sarah-keren/MAC /tmp/pip-req-build-m5egstmt
  Running command git submodule update --init --recursive -q
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: multi-agent-control
  Building wheel for multi-agent-control (PEP 517) ... [?25l[?25hdone
  Created wheel for multi-agent-control: filename=multi_agent_control-0.0.2-py3-none-any.whl size=5761 sha256=1dba2bf3cc7a84a8f1c417da2a6fd0e946a3cf57684b03179e12cd61c9dc47e7
  Stored in directory: /tmp/pip-ephem-wheel-cache-xok_8fe2/wheels/d2/b5/75/266763a6766cbb4afc7542444789afcd2edb49c523ea43d35f
Successfully built multi-agent-control
Installing collected packages: multi-agent-control
Successfully installed multi-agent

In [2]:
!pip install git+https://github.com/sarah-keren/multi_taxi


Collecting git+https://github.com/sarah-keren/multi_taxi
  Cloning https://github.com/sarah-keren/multi_taxi to /tmp/pip-req-build-g8soukcf
  Running command git clone -q https://github.com/sarah-keren/multi_taxi /tmp/pip-req-build-g8soukcf
Collecting gym==0.22.0
  Downloading gym-0.22.0.tar.gz (631 kB)
[K     |████████████████████████████████| 631 kB 8.6 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting ray==1.10.0
  Downloading ray-1.10.0-cp37-cp37m-manylinux2014_x86_64.whl (59.6 MB)
[K     |████████████████████████████████| 59.6 MB 1.3 MB/s 
[?25hCollecting ray[rllib]
  Downloading ray-1.12.0-cp37-cp37m-manylinux2014_x86_64.whl (53.2 MB)
[K     |████████████████████████████████| 53.2 MB 107 kB/s 
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.6-py3-none-any.whl (2.7 kB)
Collecting redis>=3.5.0
  Downloading redis-4.3.1-py3-none-any.w

In [3]:
import sys
import numpy as np
from control.controller_decentralized import DecentralizedController
from control.controller_centralized import CentralizedController
from agents.agent import Agent, RandomDecisionMaker
from environments.env_wrapper import EnvWrappper


### Environment Setup

In [4]:
from multi_taxi.taxi_environment import TaxiEnv
      
print('Initializing environment...')
env = TaxiEnv(num_taxis=3)
# Make sure it works with our API:
env.agents = env.taxis_names
env.action_spaces = {
    agent_name: env.action_space for agent_name in env.agents
}
env.observation_spaces = {
    agent_name: env.observation_space for agent_name in env.agents
}
env.possible_agents = [agent for agent in env.agents]
needs_conv = False
       
environment = EnvWrappper(env, needs_conv=needs_conv)






Initializing environment...


## Decentralized


In [5]:
env_agents = environment.get_env_agents()
decentralized_agents = {agent_name: Agent(RandomDecisionMaker(env.action_space))
          for agent_name in env_agents}

### Run Test

In [7]:
controller = DecentralizedController(environment, decentralized_agents)
controller.run(render=True, max_iteration=10)

+-----------------------+
| : |F: | : | : | : |F: |
| : | : : : | : | : | : |
| : : :[43m_[0m:[41m_[0m: : : : : : : |
| : : : : : | : : : :[33;1mP[0m: |
| : : : : : | : : : : : |
|[32m[33;1mD[0m[0m: : : : : : : : : : :[47m_[0m|
| | :G| | | :G| | | : | |
+-----------------------+
Taxi0-YELLOW: Fuel: inf, Location: (2,3), Collided: False
Taxi1-RED: Fuel: inf, Location: (2,4), Collided: False
Taxi2-WHITE: Fuel: inf, Location: (5,11), Collided: False
Passenger1: Location: (3, 10), Destination: (5, 0)
Done: False, {'taxi_0': False, 'taxi_1': False, 'taxi_2': False, '__all__': False}
Passengers Status's: [2]
+-----------------------+
| : |F: | : | : | : |F: |
| : | : : : | : | : | : |
| : : :[43m_[0m:[41m_[0m: : : : : : : |
| : : : : : | : : : :[33;1mP[0m: |
| : : : : : | : : : : :[47m_[0m|
|[32m[33;1mD[0m[0m: : : : : : : : : : : |
| | :G| | | :G| | | : | |
+-----------------------+
Taxi0-YELLOW: Fuel: inf, Location: (2,3), Collided: False
Taxi1-RED: Fuel: inf, Locati

## Centralized

#### Choose Central Agent

In the policy variable enter one of the following options:
    - Policy Gradient = 'pg'
    - Deep Policy Gradient = 'dpg'
    - Deep Q-Network = 'dqn'



In [8]:
central_random_agent = Agent(RandomDecisionMaker(env.action_space))
env_agents = environment.get_env_agents()
env_agents_dict = {agent_name: None  for agent_name in env_agents}

### Run Test

In [9]:
controller = CentralizedController(environment, env_agents_dict, central_random_agent)
controller.run(render=True, max_iteration=10)


+-----------------------+
| :[43m_[0m|F: | : | : | : |F: |
| : | : : : | :[32m[33;1mD[0m[0m| : | : |
| : : : :[33;1mP[0m: : : : : : : |
| : : : : : | : : :[47m_[0m: : |
| : : : : : | : : : : : |
| : : : : : : : : : : : |
| | :[41mG[0m| | | :G| | | : | |
+-----------------------+
Taxi0-YELLOW: Fuel: inf, Location: (0,1), Collided: False
Taxi1-RED: Fuel: inf, Location: (6,2), Collided: False
Taxi2-WHITE: Fuel: inf, Location: (3,9), Collided: False
Passenger1: Location: (2, 4), Destination: (1, 7)
Done: False, {'taxi_0': False, 'taxi_1': False, 'taxi_2': False, '__all__': False}
Passengers Status's: [2]
+-----------------------+
| : |F: | : | : | : |F: |
| :[43m_[0m| : : : | :[32m[33;1mD[0m[0m| : | : |
| : : : :[33;1mP[0m: : : : : : : |
| : : : : : | : : : :[47m_[0m: |
| : : : : : | : : : : : |
| : : : : : : : : : : : |
| | :[41mG[0m| | | :G| | | : | |
+-----------------------+
Taxi0-YELLOW: Fuel: inf, Location: (1,1), Collided: False
Taxi1-RED: Fuel: inf, Location