# Run Agents using the Evaluator

This is intended to run an agent which is set up to run as a client of the evaluator, much like a submission.
The eval service is run in the notebook kernel.  The env in the eval service is animated step-by-step using an Image widget in the notebook.

## Todo
- invoke a submission docker rather than run the agent in the same VM as the kernel

## Testing status
This should not yet be run in the automated build tests in the CI.

## History
This is copied from the test-service notebook which runs a service in a separate process and a simple agent in the notebook process.

# Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
import PIL
from flatland.utils.rendertools import RenderTool
import imageio
import os

In [4]:
from IPython.display import clear_output
from IPython.core import display
import ipywidgets as ipw
display.display(display.HTML("<style>.container { width:95% !important; }</style>"))

In [5]:
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from flatland.envs.malfunction_generators import malfunction_from_file, no_malfunction_generator
from flatland.envs.rail_generators import rail_from_file
from flatland.envs.schedule_generators import schedule_from_file
from flatland.core.env_observation_builder import DummyObservationBuilder
from flatland.envs.persistence import RailEnvPersister
from flatland.evaluators.client import FlatlandRemoteClient, TimeoutException
import flatland.evaluators.service as fes
from flatland.utils.rendertools import RenderTool
from flatland.utils import env_edit_utils as eeu
from flatland.utils import jupyter_utils as ju

In [6]:
import pickle
import redis
import subprocess as sp
import shlex
import time
import pkg_resources as pr
import importlib_resources as ir
import sys, os
import pandas as pd

In [7]:
!pwd

/home/jeremy/projects/aicrowd/rl-trains/flatland5/notebooks


### Find the real path of the `env_data` package (should be copied by tox)

In [8]:
with ir.path("env_data.tests", "test_001.pkl") as oPath:
    sPath = oPath
print(type(sPath), sPath)

<class 'pathlib.PosixPath'> /home3/jeremy/projects/aicrowd/rl-trains/flatland5/env_data/tests/test_001.pkl


In [9]:
sDirRoot = "/" + "/".join(sPath.parts[1:-1] + ("service_test",""))
# sDirRoot = "/home/jeremy/projects/aicrowd/rl-trains/neurips2020-flatland-scoring/environments/neurips2020_round_2_v6_final/debug/"
sDirRoot

'/home3/jeremy/projects/aicrowd/rl-trains/flatland5/env_data/tests/service_test/'

### Clear any old redis keys

In [10]:
oRedis = redis.Redis()

lKeys = oRedis.keys("flatland*")
print(lKeys)

for sKey in lKeys:
    print("Deleting:", sKey)
    oRedis.delete(sKey)

[]


### Remove `/tmp/output.csv`

In [11]:
!rm -f /tmp/output.csv

### kill any old `service.py` process

In [12]:
#!ps -ef | grep -i python | grep -i flatland.evaluators.service | awk '{print $2}' | xargs kill

In [13]:
osEnv2 = os.environ.copy()

### Timeouts copied from service.py

In [14]:
#MAX_SUCCESSIVE_TIMEOUTS = int(os.getenv("FLATLAND_MAX_SUCCESSIVE_TIMEOUTS", 10))

# 8 hours (will get debug timeout from env variable if applicable)
#OVERALL_TIMEOUT = int(os.getenv(
#    "FLATLAND_OVERALL_TIMEOUT",
#    8 * 60 * 60))

# 10 mins
#INTIAL_PLANNING_TIMEOUT = int(os.getenv(
#    "FLATLAND_INITIAL_PLANNING_TIMEOUT",
#    10 * 60))

# 10 seconds
#PER_STEP_TIMEOUT = int(os.getenv(
#    "FLATLAND_PER_STEP_TIMEOUT",
#    10))

# 5 min - applies to the rest of the commands
#DEFAULT_COMMAND_TIMEOUT = int(os.getenv(
#    "FLATLAND_DEFAULT_COMMAND_TIMEOUT",
#    5 * 60))

### Set some short timeouts for testing

In [15]:
osEnv2["FLATLAND_OVERALL_TIMEOUT"]="10"
osEnv2["FLATLAND_PER_STEP_TIMEOUT"] = "2"
osEnv2["FLATLAND_MAX_SUCCESSIVE_TIMEOUTS"] = "2"
osEnv2["AICROWD_TESTS_FOLDER"] = sDirRoot  # pass the test folder to the client/agent via Env

### Create the python command for `service.py`

In [16]:
#sCmd = "python -m flatland.evaluators.service --test_folder ../env_data/tests/service_test --mergeDir ./tmp/merge --actionDir ./tmp/actions --pickle --missingOnly"
#sCmd = "python -m flatland.evaluators.service --test_folder ../env_data/tests/service_test --pickle" # --verbose"
sCmd = f"python -m flatland.evaluators.service --test_folder {sDirRoot} --pickle" # --verbose"
lsCmd = shlex.split(sCmd)
print(sCmd)
print(lsCmd)

python -m flatland.evaluators.service --test_folder /home3/jeremy/projects/aicrowd/rl-trains/flatland5/env_data/tests/service_test/ --pickle
['python', '-m', 'flatland.evaluators.service', '--test_folder', '/home3/jeremy/projects/aicrowd/rl-trains/flatland5/env_data/tests/service_test/', '--pickle']


In [17]:
oEC = ju.EnvCanvas(size=(600,600))
oJD = ju.JuDisplay(envCanvas=oEC)
oSrv = fes.FlatlandRemoteEvaluationService(sDirRoot, disable_timeouts=True, display=oJD)

Timeout are DISABLED!
['Test_0/Level_0.pkl', 'Test_0/Level_1.pkl']
['Test_0/Level_0.pkl', 'Test_0/Level_1.pkl']


In [18]:
if True:
    # Mugurel's agent
    sCmd = f"python run.py --test_folder {sDirRoot}" # --verbose"
    sCwd = "../../mugurel"
else:
    # Jeremy's agent
    sCmd = f"python -m ijsolution.PathAgent --test_folder {sDirRoot}" # --verbose"
    sCwd = "../../jeremyw"
    

lsCmd = shlex.split(sCmd)
print(sCmd)
print(lsCmd)

python run.py --test_folder /home3/jeremy/projects/aicrowd/rl-trains/flatland5/env_data/tests/service_test/
['python', 'run.py', '--test_folder', '/home3/jeremy/projects/aicrowd/rl-trains/flatland5/env_data/tests/service_test/']


### Run the Agent command with Popen (output goes to jupyter stdout not notebook)

In [19]:
oPipe = sp.Popen(lsCmd, env=osEnv2, cwd=sCwd)

In [20]:
oPipe.poll() # hould return nothing / None.

In [21]:
!ps -ef | grep -i run.py

jeremy     77349   77319  0 21:39 ?        00:00:00 python run.py --test_folder /home3/jeremy/projects/aicrowd/rl-trains/flatland5/env_data/tests/service_test/
jeremy     77357   77319  0 21:39 pts/5    00:00:00 /bin/bash -c ps -ef | grep -i run.py
jeremy     77359   77357  0 21:39 pts/5    00:00:00 grep -i run.py


In [22]:
oSrv.run(exit_on_submit=True)

Listening at :  flatland-rl::FLATLAND_RL_SERVICE_ID::commands
Starting overall timer...
Evaluating Test_0/Level_0.pkl (0/2)


Canvas(height=600, width=600)

Percentage for test 0, level 0: 1.0
[1.0]
Evaluation finished in 43 timesteps, 0.629 seconds. Percentage agents done: 1.000. Normalized reward: 0.934. Number of malfunctions: 0.
Total normalized reward so far: 0.934
Evaluating Test_0/Level_1.pkl (1/2)
Percentage for test 0, level 1: 1.0
[1.0, 1.0]
Evaluation finished in 43 timesteps, 0.532 seconds. Percentage agents done: 1.000. Normalized reward: 0.934. Number of malfunctions: 0.
Total normalized reward so far: 1.869
Overall Message Queue Latency :  0.22375703643966507
## Server Performance Stats
	 - message_queue_latency	 => min: 0.0002777576446533203 || mean: 0.22374874418908422 || max: 0.440108060836792
	 - current_episode_controller_inference_time	 => min: 0.000179290771484375 || mean: 0.000800360080807708 || max: 0.03564882278442383
	 - controller_inference_time	 => min: 0.000179290771484375 || mean: 0.000800360080807708 || max: 0.03564882278442383
	 - internal_env_step_time	 => min: 0.0004773139953613281 || mean: 0.0011824729830

In [23]:
oPipe.poll()

### Kill the evaluator process we started earlier

In [24]:
#!ps -ef | grep -i python | grep -i flatland.evaluators.service | awk '{print $2}' | xargs kill

In [25]:
#pd.read_csv("/tmp/output.csv").T