# Multi-agent Intersection with rl-agents's DQN

##  Warming up
We start with a few useful installs and imports:

##### Only for Google Colab

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/Othercomputers/My MacBook Pro/highway-env-tibi')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##### Rest of code

In [None]:
!pip install stable-baselines3
!pip install pygame

# Environment
import gym
import highway_env

# Visualization utils
%load_ext tensorboard
import sys
from tqdm.notebook import trange
!pip install tensorboardx gym
!pip install pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg
# !git clone https://github.com/eleurent/highway-env.git 2> /dev/null
# sys.path.insert(0, '/content/highway-env/scripts/')
from scripts.utils import record_videos, show_videos

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorboardx
  Downloading tensorboardX-2.5.1-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 8.3 MB/s 
Installing collected packages: tensorboardx
Successfully installed tensorboardx-2.5.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl (15 kB)
Installing collected packages: pyvirtualdisplay
Successfully installed pyvirtualdisplay-3.0
Reading package lists... Done
Building dependency tree       
Reading state information... Done
ffmpeg is already the newest version (7:3.4.11-0ubuntu0.1).
The following package was automatic

## Training
Run tensorboard locally to visualize training.

In [None]:
%tensorboard --logdir "/content/drive/Othercomputers/My MacBook Pro/trained_agents/multi-agent"

In [20]:
import sys
sys.path.append('/content/drive/Othercomputers/My MacBook Pro/highway-env-tibi/rl-agents')
from rl_agents.trainer.evaluation import Evaluation
from rl_agents.agents.common.factory import load_agent, load_environment

# Get the environment and agent configurations from the rl-agents repository
path_config = '/content/drive/Othercomputers/My MacBook Pro/highway-env-tibi/rl-agents/'
env_config = path_config + 'configs/IntersectionEnv/env_multi_agent_dest.json'
agent_config = path_config + 'configs/IntersectionEnv/agents/DQNAgent/baseline.json'
"""
"configs/IntersectionEnv/agents/DQNAgent/ego_attention_2h.json": does not work
"""

env = load_environment(env_config)
print(agent_config)
agent = load_agent(agent_config, env)
sys.path.append('/content/drive/Othercomputers/My MacBook Pro/trained_agents/multi_agent')
run_directory = "/content/drive/Othercomputers/My MacBook Pro/trained_agents/multi_agent/baseline"

evaluation = Evaluation(env,
                        agent, 
                        run_directory=run_directory, 
                        num_episodes=50000,
                        training=True,
                        sim_seed=2,
                        recover=False,
                        display_env=False, 
                        display_agent=False,
                        display_rewards=False)
print(f"Ready to train {agent} on {env}")

INFO: Making new env: intersection-multi-agent-v0
[INFO] Choosing GPU device: 0, memory used: 725 
  f"Overwriting existing videos at {self.video_folder} folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)"


/content/drive/Othercomputers/My MacBook Pro/highway-env-tibi/rl-agents/configs/IntersectionEnv/agents/DQNAgent/baseline.json
Ready to train <rl_agents.agents.deep_q_network.pytorch.DQNAgent object at 0x7f139a211750> on <OrderEnforcing<MultiAgentIntersectionEnv<intersection-multi-agent-v0>>>


Start training. This should take about an hour.

In [None]:
evaluation.train()

[INFO] Episode 0 score: 0.0 
[INFO] Saved DQNAgent model to /content/drive/Othercomputers/My MacBook Pro/highway-env-tibi/rl-agents/checkpoint-0.tar 
[INFO] Episode 1 score: 1.0 
[INFO] Saved DQNAgent model to /content/drive/Othercomputers/My MacBook Pro/highway-env-tibi/rl-agents/checkpoint-1.tar 
[INFO] Episode 2 score: 0.0 
[INFO] Episode 3 score: 4.0 
[INFO] Episode 4 score: 8.0 
[INFO] Episode 5 score: 0.0 
[INFO] Episode 6 score: 2.0 
[INFO] Episode 7 score: 1.0 
[INFO] Episode 8 score: 0.0 
[INFO] Saved DQNAgent model to /content/drive/Othercomputers/My MacBook Pro/highway-env-tibi/rl-agents/checkpoint-8.tar 
[INFO] Episode 9 score: 9.0 
[INFO] Episode 10 score: 0.0 
[INFO] Episode 11 score: 2.0 
[INFO] Episode 12 score: 0.0 
[INFO] Episode 13 score: 6.0 
[INFO] Episode 14 score: 3.0 
[INFO] Episode 15 score: 8.0 
[INFO] Episode 16 score: 0.0 
[INFO] Episode 17 score: 0.0 
[INFO] Episode 18 score: 8.0 
[INFO] Episode 19 score: 0.0 
[INFO] Episode 20 score: 1.0 
[INFO] Episode 21

## Testing

Run the learned policy for a few episodes.

In [None]:
# Load agent model
# run_directory = "/content/drive/Othercomputers/My MacBook Pro/trained_agents/multi_agent/baseline"
# evaluation.load_agent_model(run_directory)
evaluation.load_agent_model("/content/drive/Othercomputers/My MacBook Pro/highway-env-tibi/rl-agents/out/MultiAgentIntersectionEnv/DQNAgent/saved_models/latest.tar")

In [None]:
env = load_environment(env_config)
env.configure({"offscreen_rendering": True})
agent = load_agent(agent_config, env)
evaluation = Evaluation(env, agent, num_episodes=3, recover=True)
evaluation.test()
show_videos(evaluation.run_directory)

##### Download videos and model

In [None]:
# Not required just move the files from "/content/videos" to "drive/Othercomputers/My MacBook Pro/trained_agents"
!zip -r /content/videos.zip /content/videos
from google.colab import files
files.download("/content/videos.zip")


In [None]:
%tens