# Use human_agent

In [None]:
import os
import tempfile

import tensorflow as tf

from rl_coach.environments.gym_environment import GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import SimpleSchedule
from rl_coach.base_parameters import TaskParameters, VisualizationParameters
from rl_coach.core_types import EnvironmentSteps, TrainingSteps, SelectedPhaseOnlyDumpFilter, RunPhase
from rl_coach.schedules import LinearSchedule

from rl_coach import logger

from xebikart.gym.envs import rewards as gym_rewards
from xebikart.agent import XebikartHumanAgentParameters

In [None]:
# parameters
improve_steps = 400000
max_cte_error = 5.0 # max space between the car and the center of the road before ending an episode
throttle = 0.20
scale_cte_reward=5

In [None]:
os.environ["DONKEY_SIM_HOME"] = "/Users/nlaille/UnityProjects/xebikart-unity/outputs"
vae_path = "vae_model.h5"

In [None]:
def reward_cte(cte, scale):
    return (0.25*scale)-((((cte / max_cte_error) - 0.5)**2)*scale)

In [None]:
# custom reward
def build_reward_fn(cte_scale_reward_weight, crash_reward_weight):
    def _custom_reward_fn(reward, done, info):
        """
        Custom reward function
        
        :param reward:
        :param done:
        :param info:
            "x": 
            "y": 
            "z": 
            "speed": 
            "cte": 
            "hit": 
            "throttle": 
            "steering": 
        """
        
        if done:
            # penalize the agent for getting off the road fast
            return crash_reward_weight
        else:
            cte = info["cte"]
            return reward_cte(cte, cte_scale_reward_weight)
    return _custom_reward_fn

In [None]:
# define the environment parameters
# Load VAE
vae = tf.keras.models.load_model(vae_path)
env_params = GymVectorEnvironment(level='xebikart.gym.envs:create_fix_throttle_env')
env_params.human_control = True
env_params.additional_simulator_parameters = {
  'throttle': throttle, 'vae': vae, 'max_cte_error': max_cte_error, 
  'reward_fn': build_reward_fn(cte_scale_reward_weight=scale_cte_reward, crash_reward_weight=-20)
}

# Human agent
agent_params = XebikartHumanAgentParameters()

# schedule
schedule_params = SimpleSchedule()
schedule_params.heatup_steps = EnvironmentSteps(0)
schedule_params.improve_steps = TrainingSteps(improve_steps)
schedule_params.steps_between_evaluation_periods = EnvironmentSteps(improve_steps)
schedule_params.evaluation_steps = EnvironmentSteps(0)

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
    schedule_params=schedule_params
)

In [None]:
# Create temp directory
experiment_tempdir = tempfile.mkdtemp()

In [None]:
# create graph
task_params = TaskParameters()
task_params.num_gpu = 0
task_params.use_cpu = True
task_params.experiment_path = experiment_tempdir

graph_manager.create_graph(task_params)
graph_manager.improve()

In [None]:
graph_manager.close()