In [None]:
import gymnasium as gym
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import deque
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if GPU is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
TYRE_MAP = {'HYPERSOFT': 1, 'ULTRASOFT': 1, 'SUPERSOFT': 1, 'SOFT': 2, 'MEDIUM': 3, 'HARD': 4, 'INTERMEDIATE': 5, 'WET': 6, 'nan': 0}

In [None]:
class FormulaOneRacingEnv(gym.Env):
    """
    A Markov Decision Process model for Formula 1 race strategy, incorporating detailed race data.
    """

    def __init__(self):
        super(FormulaOneRacingEnv, self).__init__()
        # Define the state space
        self.lap_count = 70  # Total laps
        self.current_lap = 0
        self.tire_wear = 0  # Tire wear percentage
        self.fuel_level = 100  # Fuel level percentage
        self.tire_age = 0  # Age of the tire in laps
        self.gap_to_front = 0  # Gap in seconds to the car in front
        self.gap_to_behind = 0  # Gap in seconds to the car behind
        self.position = 1  # Current position in the race
        self.fcy = 0  # Full course yellow flag (0: no, 1: yes)
        self.drs_available = 0  # DRS availability (0: no, 1: yes)

        # Define the action space (0: no pit, 1: pit for soft, 2: pit for medium, 3: pit for hard)
        self.action_space = gym.spaces.Discrete(4)

        # Define the state space
        self.observation_space = gym.spaces.Dict({
            'current_lap': gym.spaces.Discrete(self.lap_count),
            'tire_wear': gym.spaces.Box(low=0, high=100, shape=(1,), dtype=np.float32),
            'fuel_level': gym.spaces.Box(low=0, high=100, shape=(1,), dtype=np.float32),
            'tire_age': gym.spaces.Discrete(100),
            'gap_to_front': gym.spaces.Box(low=0, high=120, shape=(1,), dtype=np.float32),
            'gap_to_behind': gym.spaces.Box(low=0, high=120, shape=(1,), dtype=np.float32),
            'position': gym.spaces.Discrete(20),
            'fcy': gym.spaces.Discrete(2),
            'drs_available': gym.spaces.Discrete(2)
        })

    def step(self, action):
        done = False
        reward = 0

        # Simulate tire wear and fuel consumption
        tire_degradation = np.random.normal(2, 0.5)
        fuel_consumption = np.random.normal(1.5, 0.2)

        # Handle action effects
        if action == 0:
            # No pit stop
            self.tire_age += 1
            self.tire_wear += tire_degradation
            self.fuel_level -= fuel_consumption
        else:
            # Pit stop for new tires
            self.tire_wear = 0
            self.tire_age = 0
            self.fuel_level -= 5  # Pit stop fuel usage
            self.fuel_level = min(self.fuel_level + 30, 100)  # Refuel
            reward += -30  # Time penalty for pitting
            if action in [4, 5]:  # Change tire type based on action
                reward += -20  # Additional time penalty for changing to specialty tires

        # Update lap and check for race end
        self.current_lap += 1
        if self.current_lap >= self.lap_count:
            done = True
        
        # Reward calculation
        reward += max(0, 100 - self.tire_wear - (100 - self.fuel_level))
        reward += -abs(self.gap_to_front)  # Minimize the gap to the front car

        # Define next state
        next_state = {
            'current_lap': self.current_lap,
            'tire_wear': self.tire_wear,
            'fuel_level': self.fuel_level,
            'tire_age': self.tire_age,
            'gap_to_front': self.gap_to_front,
            'gap_to_behind': self.gap_to_behind,
            'position': self.position,
            'fcy': self.fcy,
            'drs_available': self.drs_available
        }

        return next_state, reward, done, {}

    def reset(self, lap_count):
        self.lap_count = lap_count
        self.current_lap = 0
        self.tire_wear = 0
        self.fuel_level = 100
        self.tire_age = 0
        self.gap_to_front = np.random.uniform(0, 10)
        self.gap_to_behind = np.random.uniform(0, 10)
        self.position = np.random.randint(1, 21)
        self.fcy = np.random.choice([0, 1])
        self.drs_available = np.random.choice([0, 1])
        return {
            'current_lap': self.current_lap,
            'tire_wear': self.tire_wear,
            'fuel_level': self.fuel_level,
            'tire_age': self.tire_age,
            'gap_to_front': self.gap_to_front,
            'gap_to_behind': self.gap_to_behind,
            'position': self.position,
            'fcy': self.fcy,
            'drs_available': self.drs_available
        }

    def render(self, mode='human'):
        pass

In [None]:
import torch
import torch.nn as nn

class QNetwork(nn.Module):
    def __init__(self, state_dim, num_tire_types, max_laps, hidden_dim=64):
        super(QNetwork, self).__init__()
        # Shared layers
        self.fc1 = nn.Linear(state_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        
        # Output layer for tire type
        self.tire_out = nn.Linear(hidden_dim, num_tire_types)
        
        # Output layer for lap number
        self.lap_out = nn.Linear(hidden_dim, max_laps)
    
    def forward(self, state):
        x = torch.relu(self.fc1(state))
        x = torch.relu(self.fc2(x))
        
        tire_type = self.tire_out(x)
        lap_number = self.lap_out(x)
        
        return tire_type, lap_number

In [None]:
class DQNAgent:
    def __init__(self, state_dim, action_dim, lr=1e-4, gamma=0.99, buffer_size=10000, batch_size=64):
        self.action_dim = action_dim
        self.memory = deque(maxlen=buffer_size)
        self.batch_size = batch_size
        self.gamma = gamma

        self.q_network = QNetwork(state_dim, action_dim)
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=lr)
        self.criterion = nn.MSELoss()

    def add_to_memory(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state, epsilon=0.1):
        if random.random() > epsilon:
            state = torch.FloatTensor(state).unsqueeze(0)
            q_values = self.q_network(state)
            action = q_values.max(1)[1].item()
        else:
            action = random.randint(0, self.action_dim - 1)
        return action

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.FloatTensor(states)
        actions = torch.LongTensor(actions)
        rewards = torch.FloatTensor(rewards)
        next_states = torch.FloatTensor(next_states)
        dones = torch.FloatTensor(dones)

        current_q = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze(1)
        next_q = self.q_network(next_states).max(1)[0]
        expected_q = rewards + self.gamma * next_q * (1 - dones)

        loss = self.criterion(current_q, expected_q)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

In [None]:
env = FormulaOneRacingEnv()
agent = DQNAgent(state_dim=9, action_dim=4)  # Update dimensions appropriately

episodes = 500
for episode in range(episodes):
    state = env.reset(70)
    total_reward = 0
    done = False
    while not done:
        action = agent.act(np.array(list(state.values())))
        next_state, reward, done, _ = env.step(action)
        agent.add_to_memory(state, action, reward, next_state, done)
        agent.replay()
        state = next_state
        total_reward += reward
    print(f"Episode: {episode + 1}, Total reward: {total_reward}")