In [1]:
from racesim.racesimulation import RaceSimulation
from racesim.config import *
import pandas as pd
import numpy as np
import time
import gymnasium as gym
from tqdm import tqdm

In [2]:
from sb3_contrib.qrdqn import QRDQN

In [3]:
from gymnasium_env.envs.f1_env import RewardFunctionPerPositionAtFinalLap
from gymnasium_env.envs.f1_env import RewardFunctionPositionChangeByLapsToGo
from gymnasium_env.envs.f1_env import RewardFunctionPositionChange
from gymnasium_env.envs.f1_env import RewardFunctionPerPositionAtFinalLapEval

In [4]:
n_seeds = 1000
n_races_to_test = 1

## QR-DQN

### R2 - Run 1 Best

In [5]:
qr_dqn_r2_start = []
qr_dqn_r2_finish = []
agent = QRDQN.load("run_1/best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r2_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r2_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r2_finish).mean())
print(np.array(qr_dqn_r2_finish).std())

100%|███████████████████████████████████████| 1000/1000 [09:58<00:00,  1.67it/s]

16.048
3.449593599251947





### R2 - Run 1 Latest

In [6]:
qr_dqn_r2_start = []
qr_dqn_r2_finish = []
agent = QRDQN.load("run_1/model_reward_position_change_0.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r2_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r2_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r2_finish).mean())
print(np.array(qr_dqn_r2_finish).std())

100%|███████████████████████████████████████| 1000/1000 [09:49<00:00,  1.70it/s]

16.029
3.1931424960374066





### R2- Run 2 Best

In [7]:
qr_dqn_r2_start = []
qr_dqn_r2_finish = []
agent = QRDQN.load("run_2/best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r2_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r2_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r2_finish).mean())
print(np.array(qr_dqn_r2_finish).std())

100%|███████████████████████████████████████| 1000/1000 [09:54<00:00,  1.68it/s]

14.173
3.3161831975932814





### R2 - Run 2 Latest

In [8]:
qr_dqn_r2_start = []
qr_dqn_r2_finish = []
agent = QRDQN.load("run_2/model_reward_position_change_1.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r2_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r2_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r2_finish).mean())
print(np.array(qr_dqn_r2_finish).std())

100%|███████████████████████████████████████| 1000/1000 [09:55<00:00,  1.68it/s]

14.096
3.3437679345313422





### R2 - Run 3 Best

In [9]:
qr_dqn_r2_start = []
qr_dqn_r2_finish = []
agent = QRDQN.load("run_3/best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r2_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r2_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r2_finish).mean())
print(np.array(qr_dqn_r2_finish).std())

100%|███████████████████████████████████████| 1000/1000 [10:09<00:00,  1.64it/s]

10.836
5.998091696531489





### R2 - Run 3 Latest

In [10]:
qr_dqn_r2_start = []
qr_dqn_r2_finish = []
agent = QRDQN.load("run_3/model_reward_position_change_2.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r2_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r2_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r2_finish).mean())
print(np.array(qr_dqn_r2_finish).std())

100%|███████████████████████████████████████| 1000/1000 [10:24<00:00,  1.60it/s]

14.966
3.6609894837325063



