# Racing

In [1]:
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os

## Env

In [2]:
ENV_NAME = 'CarRacing-v0'

#### Testing

In [3]:
env = gym.make(ENV_NAME)

AttributeError: module 'gym.envs.box2d' has no attribute 'CarRacing'

### Vectorized Stack of Envs

In [79]:
# stack env to train on all at the same time
env = make_atari_env(ENV_NAME, n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

## Model

### Initializing

In [75]:
LOG_PATH = os.path.join('training', 'logs', ENV_NAME)

In [61]:
# new
model = A2C('CnnPolicy', env, verbose=1, tensorboard_log=LOG_PATH)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [80]:
# load
MODEL_PATH = os.path.join('training', 'saved_models', ENV_NAME, 'v1')
model = A2C.load(MODEL_PATH, env=env)

Wrapping the env in a VecTransposeImage.


### Training

In [76]:
model.learn(total_timesteps=10000)

Logging to training\logs\breakout\A2C_3
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 480      |
|    ep_rew_mean        | 5.89     |
| time/                 |          |
|    fps                | 123      |
|    iterations         | 100      |
|    time_elapsed       | 16       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -0.79    |
|    explained_variance | 0.917    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5149     |
|    policy_loss        | 0.1      |
|    value_loss         | 0.109    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 474      |
|    ep_rew_mean        | 5.8      |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 200      |
|    time_elapsed       | 32       |
|    total_timesteps    | 4000     

<stable_baselines3.a2c.a2c.A2C at 0x1eaebb616a0>

In [77]:
# save model
SAVE_PATH = os.path.join('training', 'saved_models', ENV_NAME, 'v1')
model.save(SAVE_PATH)
del model

### Evaluation

In [81]:
env = make_atari_env(ENV_NAME, n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [84]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

(5.1, 1.57797338380595)

In [85]:
env.close()