# Racing

In [1]:
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os

## Env

In [2]:
ENV_NAME = 'CarRacing-v0'

#### Testing

In [8]:
env = gym.make(ENV_NAME)

episodes = 2
for ep in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward
    
    print(f"Ep #{ep} - Score: {score}")
env.close()

Track generation: 1091..1368 -> 277-tiles track
Ep #1 - Score: -31.15942028985545
Track generation: 1248..1564 -> 316-tiles track
Ep #2 - Score: -39.682539682540295
Track generation: 1164..1459 -> 295-tiles track
Ep #3 - Score: -31.972789115646677
Track generation: 1100..1386 -> 286-tiles track
Ep #4 - Score: -26.315789473684536
Track generation: 1189..1490 -> 301-tiles track
Ep #5 - Score: -36.6666666666672


### Vectorized Stack of Envs

In [3]:
env = gym.make(ENV_NAME)
env = DummyVecEnv([lambda: env])

## Model

### Initializing

In [4]:
LOG_PATH = os.path.join('training', 'logs', ENV_NAME)

In [5]:
# new
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_PATH)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [8]:
# load
MODEL_PATH = os.path.join('training', 'saved_models', ENV_NAME, 'v1')
model = PPO.load(MODEL_PATH, env=env)

Wrapping the env in a VecTransposeImage.


### Training

In [6]:
model.learn(total_timesteps=100000)

Track generation: 1056..1324 -> 268-tiles track
Logging to training/logs/CarRacing-v0/PPO_1
Track generation: 1240..1554 -> 314-tiles track
Track generation: 1187..1488 -> 301-tiles track
-----------------------------
| time/              |      |
|    fps             | 218  |
|    iterations      | 1    |
|    time_elapsed    | 9    |
|    total_timesteps | 2048 |
-----------------------------
Track generation: 1118..1408 -> 290-tiles track
Track generation: 1127..1420 -> 293-tiles track
-----------------------------------------
| time/                   |             |
|    fps                  | 117         |
|    iterations           | 2           |
|    time_elapsed         | 34          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008518344 |
|    clip_fraction        | 0.107       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.23       |
|    explained_variance   | -0.00205    |
| 

<stable_baselines3.ppo.ppo.PPO at 0x7f99042184c0>

In [7]:
# save model
SAVE_PATH = os.path.join('training', 'saved_models', ENV_NAME, 'v1')
model.save(SAVE_PATH)
del model



### Evaluation

In [9]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)
# returns reward, std deviation



Track generation: 1215..1523 -> 308-tiles track
Track generation: 1140..1429 -> 289-tiles track
Track generation: 1062..1332 -> 270-tiles track
Track generation: 1157..1450 -> 293-tiles track
Track generation: 1037..1303 -> 266-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1080..1354 -> 274-tiles track
Track generation: 1139..1428 -> 289-tiles track
Track generation: 1174..1478 -> 304-tiles track
Track generation: 1136..1432 -> 296-tiles track
Track generation: 894..1129 -> 235-tiles track
Track generation: 1146..1437 -> 291-tiles track
Track generation: 1118..1411 -> 293-tiles track


(130.6724334895611, 112.93000714175278)

In [10]:
env.close()

### Test

In [12]:
episodes = 2
for ep in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        score += reward
    
    print(f"Ep #{ep} - Score: {score}")
env.close()

Track generation: 1220..1529 -> 309-tiles track
Track generation: 1120..1404 -> 284-tiles track
Ep #1 - Score: [162.98268]
Track generation: 1160..1459 -> 299-tiles track
Track generation: 1088..1364 -> 276-tiles track
Ep #2 - Score: [17.450756]
