In [1]:
from racesim.racesimulation import RaceSimulation
from racesim.config import *
import pandas as pd
import numpy as np
import time
import gymnasium as gym
from tqdm import tqdm

In [2]:
from sb3_contrib.qrdqn import QRDQN

In [3]:
from gymnasium_env.envs.f1_env import RewardFunctionPerPositionAtFinalLap
from gymnasium_env.envs.f1_env import RewardFunctionPositionChangeByLapsToGo
from gymnasium_env.envs.f1_env import RewardFunctionPositionChange
from gymnasium_env.envs.f1_env import RewardFunctionPerPositionAtFinalLapEval

In [4]:
n_seeds = 1000
n_races_to_test = 1

In [6]:
agent = QRDQN.load("run_1/best_model.zip")
agent.quantile_net

QuantileNetwork(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (quantile_net): Sequential(
    (0): Linear(in_features=220, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=84, bias=True)
  )
)

## QR-DQN

### R1 - Run 1 Best

In [5]:
qr_dqn_r1_start = []
qr_dqn_r1_finish = []
agent = QRDQN.load("run_1/best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r1_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r1_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r1_finish).mean())
print(np.array(qr_dqn_r1_finish).std())

100%|███████████████████████████████████████| 1000/1000 [09:25<00:00,  1.77it/s]

12.283
6.533369039018078





### R1 - Run 1 Latest

In [6]:
qr_dqn_r1_start = []
qr_dqn_r1_finish = []
agent = QRDQN.load("run_1/model_reward_position_change_0.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r1_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r1_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r1_finish).mean())
print(np.array(qr_dqn_r1_finish).std())

100%|███████████████████████████████████████| 1000/1000 [09:32<00:00,  1.75it/s]

14.779
5.0900057956745





### R1 - Run 2 Best

In [7]:
qr_dqn_r1_start = []
qr_dqn_r1_finish = []
agent = QRDQN.load("run_2/best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r1_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r1_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r1_finish).mean())
print(np.array(qr_dqn_r1_finish).std())

100%|███████████████████████████████████████| 1000/1000 [09:28<00:00,  1.76it/s]

14.511
5.5300885164706





### R1 - Run 2 Latest

In [9]:
qr_dqn_r1_start = []
qr_dqn_r1_finish = []
agent = QRDQN.load("run_2/model_reward_position_change_1.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r1_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r1_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r1_finish).mean())
print(np.array(qr_dqn_r1_finish).std())

100%|███████████████████████████████████████| 1000/1000 [09:37<00:00,  1.73it/s]

15.344
5.0045643167013045





### R1 - Run 3 Best

In [10]:
qr_dqn_r1_start = []
qr_dqn_r1_finish = []
agent = QRDQN.load("run_3/best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r1_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r1_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r1_finish).mean())
print(np.array(qr_dqn_r1_finish).std())

100%|███████████████████████████████████████| 1000/1000 [09:39<00:00,  1.73it/s]

11.974
6.070529136739235





### R1 - Run 3 Latest

In [11]:
qr_dqn_r1_start = []
qr_dqn_r1_finish = []
agent = QRDQN.load("run_3/model_reward_position_change_2.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r1_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r1_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r1_finish).mean())
print(np.array(qr_dqn_r1_finish).std())

100%|███████████████████████████████████████| 1000/1000 [10:19<00:00,  1.61it/s]

10.459
6.313661299119554



