In [1]:
from racesim.racesimulation import RaceSimulation
from racesim.config import *
import pandas as pd
import numpy as np
import time
import gymnasium as gym
from tqdm import tqdm

In [2]:
from stable_baselines3.dqn import DQN
from stable_baselines3.a2c import A2C
from sb3_contrib.qrdqn import QRDQN

In [3]:
from gymnasium_env.envs.f1_env import RewardFunctionPerPositionAtFinalLap
from gymnasium_env.envs.f1_env import RewardFunctionPositionChangeByLapsToGo
from gymnasium_env.envs.f1_env import RewardFunctionPositionChange
from gymnasium_env.envs.f1_env import RewardFunctionPerPositionAtFinalLapEval

In [4]:
n_seeds = 1000
n_races_to_test = 1

## RANDOM

### RANDOMNESS = 0

In [5]:
finish_positions_base = []
start_positions_base = []

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=False, randomness=0)
    obs, info = env_basic.reset()
    start_positions_base.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__)
    done = False
    total_rw = 0
    
    while not done:
        next_obs, reward, terminated, truncated, info = env_basic.step(0)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    finish_positions_base.append(env_basic.our_driver.position)

# print(start_positions_base)
# print(finish_positions_base)
print(np.array(finish_positions_base).mean())
print(np.array(finish_positions_base).std())

100%|███████████████████████████████████████| 1000/1000 [17:57<00:00,  1.08s/it]

10.488
5.712079831374909





### RANDOMNESS = 1

In [6]:
finish_positions_base_1 = []
start_positions_base_1 = []

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=False, randomness=1)
    obs, info = env_basic.reset()
    start_positions_base_1.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__)
    done = False
    total_rw = 0
    
    while not done:
        next_obs, reward, terminated, truncated, info = env_basic.step(0)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    finish_positions_base_1.append(env_basic.our_driver.position)

# print(start_positions_base)
# print(finish_positions_base)
print(np.array(finish_positions_base_1).mean())
print(np.array(finish_positions_base_1).std())

100%|███████████████████████████████████████| 1000/1000 [18:56<00:00,  1.14s/it]

13.559
4.61459846573892





### RANDOMNESS = 2

In [7]:
finish_positions_base_2 = []
start_positions_base_2 = []

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=False, randomness=2)
    obs, info = env_basic.reset()
    start_positions_base_2.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__)
    done = False
    total_rw = 0
    
    while not done:
        next_obs, reward, terminated, truncated, info = env_basic.step(0)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    finish_positions_base_2.append(env_basic.our_driver.position)

# print(start_positions_base)
# print(finish_positions_base)
print(np.array(finish_positions_base_2).mean())
print(np.array(finish_positions_base_2).std())

100%|███████████████████████████████████████| 1000/1000 [17:32<00:00,  1.05s/it]

16.219
4.641663387192138





## DQN

### R1

In [8]:
dqn_r1_start = []
dqn_r1_finish = []
agent = DQN.load("dqn_r1_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    dqn_r1_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    dqn_r1_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

print(np.array(dqn_r1_finish).mean())
print(np.array(dqn_r1_finish).std())
    

100%|███████████████████████████████████████| 1000/1000 [19:08<00:00,  1.15s/it]

11.267
5.814267881685534





### R2

In [9]:
dqn_r2_start = []
dqn_r2_finish = []
agent = DQN.load("dqn_r2_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    dqn_r2_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    dqn_r2_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

print(np.array(dqn_r2_finish).mean())
print(np.array(dqn_r2_finish).std())
    

100%|███████████████████████████████████████| 1000/1000 [15:04<00:00,  1.11it/s]

11.904
4.4028154628601





### R3 - Trained with Fixed Grid

In [10]:
dqn_r3_start = []
dqn_r3_finish = []
agent = DQN.load("dqn_r3_fixed_grid_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    dqn_r3_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    dqn_r3_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(dqn_r3_start)
# print(dqn_r3_finish)
print(np.array(dqn_r3_finish).mean())
print(np.array(dqn_r3_finish).std())
    

100%|███████████████████████████████████████| 1000/1000 [19:15<00:00,  1.16s/it]

11.645
5.611682011661031





### R3 - Trained with Random Grid

In [11]:
dqn_r3_start = []
dqn_r3_finish = []
agent = DQN.load("model_reward_position_change_1.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    dqn_r3_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    dqn_r3_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(dqn_r3_start)
# print(dqn_r3_finish)
print(np.array(dqn_r3_finish).mean())
print(np.array(dqn_r3_finish).std())
    

100%|███████████████████████████████████████| 1000/1000 [13:34<00:00,  1.23it/s]

11.267
4.872751891898457





## QR-DQN

### R1

In [12]:
qr_dqn_r1_start = []
qr_dqn_r1_finish = []
agent = QRDQN.load("qr-dqn_r1_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r1_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r1_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r1_finish).mean())
print(np.array(qr_dqn_r1_finish).std())

100%|███████████████████████████████████████| 1000/1000 [20:06<00:00,  1.21s/it]

10.459
6.313661299119554





### R2

In [13]:
qr_dqn_r2_start = []
qr_dqn_r2_finish = []
agent = QRDQN.load("qr-dqn_r2_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r2_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r2_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r2_finish).mean())
print(np.array(qr_dqn_r2_finish).std())

100%|███████████████████████████████████████| 1000/1000 [20:02<00:00,  1.20s/it]

10.836
5.998091696531489





### R3 - Trained with Fixed Grid

In [14]:
qr_dqn_r3_start = []
qr_dqn_r3_finish = []
agent = QRDQN.load("qr-dqn_r3_fixed_grid_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r3_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r3_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r3_finish).mean())
print(np.array(qr_dqn_r3_finish).std())

100%|███████████████████████████████████████| 1000/1000 [20:08<00:00,  1.21s/it]

11.939
5.322337738249988





### R3 - Trained with Random Grid

In [15]:
qr_dqn_r3_start = []
qr_dqn_r3_finish = []
agent = QRDQN.load("qr-dqn_r3_random_grid_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    qr_dqn_r3_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    qr_dqn_r3_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(qr_dqn_r3_finish).mean())
print(np.array(qr_dqn_r3_finish).std())

100%|███████████████████████████████████████| 1000/1000 [20:04<00:00,  1.20s/it]

13.091
6.094154494267436





## A2C

### R1

In [16]:
a2c_r1_start = []
a2c_r1_finish = []
agent = A2C.load("a2c_r1_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    a2c_r1_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    a2c_r1_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(a2c_r1_finish).mean())
print(np.array(a2c_r1_finish).std())
    

100%|███████████████████████████████████████| 1000/1000 [19:28<00:00,  1.17s/it]

9.802
6.892807555706165





### R2

In [17]:
a2c_r2_start = []
a2c_r2_finish = []
agent = A2C.load("a2c_r2_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    a2c_r2_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    a2c_r2_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(a2c_r2_finish).mean())
print(np.array(a2c_r2_finish).std())
    

100%|███████████████████████████████████████| 1000/1000 [19:37<00:00,  1.18s/it]

10.069
7.132758162169806





### R3 - Trained with Fixed Grid

In [18]:
a2c_r3_start = []
a2c_r3_finish = []
agent = A2C.load("a2c_r3_fixed_grid_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    a2c_r3_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    a2c_r3_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(a2c_r3_finish).mean())
print(np.array(a2c_r3_finish).std())

100%|███████████████████████████████████████| 1000/1000 [20:49<00:00,  1.25s/it]

7.684
5.507644142462365





### R3 - Trained with Random Grid

In [19]:
a2c_r3_start = []
a2c_r3_finish = []
agent = A2C.load("a2c_r3_random_grid_best_model.zip")

for seed in tqdm(range(n_seeds)):
    env_basic = gym.make('F1Env/Basic-v0',seed=seed, reward_function = RewardFunctionPerPositionAtFinalLapEval(),
                     grid_config=GridConfigMixed(), control_driver=True)
    obs, info = env_basic.reset()
    a2c_r3_start.append(env_basic.our_driver.position)
    # print(env_basic.our_driver.tyre.__dict__, " - Start Position: ", env_basic.our_driver.position)
    done = False
    total_rw = 0
    stops = 0
    
    while not done:
        action, _ = agent.predict(obs, deterministic=True)
        if action != 0:
            stops += 1
        next_obs, reward, terminated, truncated, info = env_basic.step(action)

        total_rw += reward
        done = terminated or truncated
        obs = next_obs
    a2c_r3_finish.append(env_basic.our_driver.position)
    # print("STOPS: ", stops)

# print(a2c_r1_start
# print(a2c_r1_finish)
print(np.array(a2c_r3_finish).mean())
print(np.array(a2c_r3_finish).std())

100%|███████████████████████████████████████| 1000/1000 [20:21<00:00,  1.22s/it]

7.208
5.235144315107274



