## Testing of the Graph Environment

Imports

In [1]:
import numpy as np
import pandas as pd
import json
import os
import shutil
import sys
import gym

import ray
from ray.rllib.agents.ppo import PPOTrainer, DEFAULT_CONFIG

Import the Environment

In [2]:
sys.path.insert(0,"")

from StreetGraph import StreetGraph
from gym_graphenv.envs.Graphworld import GraphEnv

Run random steps without Ray Trainer

In [3]:
def run_one_episode (env):
    env.reset()
    sum_reward = 0
    for i in range(30):
        print(env.action_space)
        action = env.action_space.sample()
        state, reward, done, info = env.step(action)
        sum_reward+=reward
        #env.render()
        if done:
            print("sum_reward: ",sum_reward, " time: ",env.time,  "deadline time: ", env.deadline,"pickup time: ", env.pickup_time )
            break

        print("sum_reward: ",sum_reward, " time: ",env.time, "deadline time: ", env.deadline, "pickup time: ", env.pickup_time)
    return sum_reward

env=GraphEnv()
for i in range(1):
    run_one_episode (env)

Discrete(2)
Available actions:  [[{'type': 'wait'}], [{'type': 'ownRide'}]]
Action space:  Discrete(2)
action ==  ownRide 
sum_reward:  20  time:  2022-01-01 23:54:19.600000 deadline time:  2022-01-02 02:45:00 pickup time:  2022-01-01 23:45:00


Initialize Ray

In [4]:
ray.init()

2022-04-04 23:03:43,136	INFO services.py:1374 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


{'node_ip_address': '127.0.0.1',
 'raylet_ip_address': '127.0.0.1',
 'redis_address': '127.0.0.1:6379',
 'object_store_address': '/tmp/ray/session_2022-04-04_23-03-41_277127_1726/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2022-04-04_23-03-41_277127_1726/sockets/raylet',
 'webui_url': '127.0.0.1:8265',
 'session_dir': '/tmp/ray/session_2022-04-04_23-03-41_277127_1726',
 'metrics_export_port': 53952,
 'gcs_address': '127.0.0.1:50394',
 'node_id': 'efa2e33a7c51cdac692ab13e217b725b699cfee80eee013adcab00e4'}

Set trainer configuration

In [5]:
trainer_config = DEFAULT_CONFIG.copy()
trainer_config['num_workers'] = 1
trainer_config["train_batch_size"] = 400
trainer_config["sgd_minibatch_size"] = 64
trainer_config["num_sgd_iter"] = 10
trainer_config["framework"] = "torch"

Initialize the trainer

In [6]:
trainer = PPOTrainer(trainer_config,GraphEnv )

2022-04-04 23:03:44,511	INFO ppo.py:249 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2022-04-04 23:03:44,512	INFO trainer.py:790 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Define the path where the results of the trainer should be saved

In [7]:
checkpoint_root = "tmp/ppo/graphworld"
shutil.rmtree(checkpoint_root, ignore_errors=True, onerror=None)   # clean up old runs

Run the trainer

In [8]:
results = []
episode_data = []
episode_json = []
n_iter=10

for n in range(n_iter):
    result = trainer.train()
    results.append(result)
    
    episode = {'n': n, 
               'episode_reward_min': result['episode_reward_min'], 
               'episode_reward_mean': result['episode_reward_mean'], 
               'episode_reward_max': result['episode_reward_max'],  
               'episode_len_mean': result['episode_len_mean']
              }
    
    episode_data.append(episode)
    episode_json.append(json.dumps(episode))
    file_name = trainer.save(checkpoint_root)
    
    print(f'{n+1:3d}: Min/Mean/Max reward: {result["episode_reward_min"]:8.4f}/{result["episode_reward_mean"]:8.4f}/{result["episode_reward_max"]:8.4f}, len mean: {result["episode_len_mean"]:8.4f}. Checkpoint saved to {file_name}')

[2m[36m(RolloutWorker pid=1796)[0m Available actions:  [[{'type': 'wait'}], [{'type': 'ownRide'}], {'departure_time': datetime.datetime(2022, 1, 1, 7, 20), 'target_hub': 306218723, 'route': [290333444, 304525639, 307663269, 307662889, 307662284, 309450366, 307662242, 306222128, 309450229, 306226719, 306222462, 306218723]}]
[2m[36m(RolloutWorker pid=1796)[0m Action space:  Discrete(3)
[2m[36m(RolloutWorker pid=1796)[0m action ==  ownRide 
[2m[36m(RolloutWorker pid=1796)[0m Available actions:  [[{'type': 'wait'}], [{'type': 'ownRide'}], {'departure_time': datetime.datetime(2022, 1, 1, 15, 55, 42), 'target_hub': 306221653, 'route': [290333444, 304525639, 307663269, 307662889, 307662284, 309450366, 307662242, 306222128, 306222385, 306221900, 306225115, 306225011, 306221653]}]
[2m[36m(RolloutWorker pid=1796)[0m Action space:  Discrete(3)
[2m[36m(RolloutWorker pid=1796)[0m action ==  ownRide 
[2m[36m(RolloutWorker pid=1796)[0m Available actions:  [[{'type': 'wait'}], [{'



[2m[36m(RolloutWorker pid=1796)[0m action ==  ownRide 
  1: Min/Mean/Max reward:   8.0000/ 19.2979/100.0000, len mean:   2.8369. Checkpoint saved to tmp/ppo/graphworld/checkpoint_000001/checkpoint-1
[2m[36m(RolloutWorker pid=1796)[0m Available actions:  [[{'type': 'wait'}], [{'type': 'ownRide'}]]
[2m[36m(RolloutWorker pid=1796)[0m Action space:  Discrete(2)
[2m[36m(RolloutWorker pid=1796)[0m invalid action, action to be taken is:  2  but the action space is:  Discrete(2)
[2m[36m(RolloutWorker pid=1796)[0m Available actions:  [[{'type': 'wait'}], [{'type': 'ownRide'}]]
[2m[36m(RolloutWorker pid=1796)[0m Action space:  Discrete(2)
[2m[36m(RolloutWorker pid=1796)[0m action == wait 
[2m[36m(RolloutWorker pid=1796)[0m Available actions:  [[{'type': 'wait'}], [{'type': 'ownRide'}]]
[2m[36m(RolloutWorker pid=1796)[0m Action space:  Discrete(2)
[2m[36m(RolloutWorker pid=1796)[0m action == wait 
[2m[36m(RolloutWorker pid=1796)[0m Available actions:  [[{'type': '

In [12]:
results

[{'episode_reward_max': 100.0,
  'episode_reward_min': 8.0,
  'episode_reward_mean': 19.29787234042553,
  'episode_len_mean': 2.8368794326241136,
  'episode_media': {},
  'episodes_this_iter': 141,
  'policy_reward_min': {},
  'policy_reward_max': {},
  'policy_reward_mean': {},
  'custom_metrics': {},
  'hist_stats': {'episode_reward': [20.0,
    20.0,
    17.0,
    20.0,
    20.0,
    16.0,
    14.0,
    19.0,
    20.0,
    18.0,
    11.0,
    20.0,
    20.0,
    14.0,
    16.0,
    20.0,
    20.0,
    18.0,
    16.0,
    18.0,
    20.0,
    20.0,
    20.0,
    18.0,
    20.0,
    97.0,
    19.0,
    18.0,
    16.0,
    20.0,
    20.0,
    20.0,
    18.0,
    19.0,
    20.0,
    18.0,
    18.0,
    20.0,
    20.0,
    20.0,
    18.0,
    18.0,
    20.0,
    11.0,
    20.0,
    18.0,
    19.0,
    20.0,
    20.0,
    19.0,
    19.0,
    13.0,
    18.0,
    19.0,
    19.0,
    18.0,
    18.0,
    19.0,
    13.0,
    18.0,
    17.0,
    19.0,
    20.0,
    19.0,
    20.0,
    20.0,
    

Testing of the trainer

In [9]:
trainer.restore(file_name)
env = gym.make("graphworld-v0")
state = env.reset()

2022-04-05 09:40:57,197	INFO trainable.py:472 -- Restored on 127.0.0.1 from checkpoint: tmp/ppo/graphworld/checkpoint_000010/checkpoint-10
2022-04-05 09:40:57,200	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': 4000, '_time_total': 38226.445654153824, '_episodes_total': 2911}


In [10]:
sum_reward = 0
n_step = 20
for step in range(n_step):
    action = trainer.compute_action(state)
    state, reward, done, info = env.step(action)
    sum_reward += reward
    #env.render()
    if done == 1:
        print("cumulative reward", sum_reward)
        state = env.reset()
        sum_reward = 0



Available actions:  [[{'type': 'wait'}], [{'type': 'ownRide'}]]
Action space:  Discrete(2)
action ==  ownRide 
cumulative reward 20
Available actions:  [[{'type': 'wait'}], [{'type': 'ownRide'}], {'departure_time': datetime.datetime(2022, 1, 1, 23, 29, 35), 'target_hub': 290333400, 'route': [290333444, 290333400]}, {'departure_time': datetime.datetime(2022, 1, 1, 23, 32), 'target_hub': 306218723, 'route': [290333444, 304525639, 307663269, 307662889, 307662284, 309450366, 307662242, 306222128, 309450229, 306226719, 306222462, 306218723]}]
Action space:  Discrete(4)
action ==  ownRide 
cumulative reward 20
Available actions:  [[{'type': 'wait'}], [{'type': 'ownRide'}], {'departure_time': datetime.datetime(2022, 1, 1, 15, 57, 13), 'target_hub': 306221653, 'route': [290333444, 304525639, 307663269, 307662889, 307662284, 309450366, 307662242, 306222128, 306222385, 306221900, 306225115, 306225011, 306221653]}, {'departure_time': datetime.datetime(2022, 1, 1, 15, 58, 23), 'target_hub': 290333

In [11]:
ray.shutdown()