In [12]:
from models.reinforcement_learning import *

In [13]:
track_file = 'datasets/L-track.txt'
racetrack = Racetrack(track_file)

In [14]:
# Assuming your Simulator class and environment are correctly set up
simulator = Simulator(racetrack, 'nearest')  # Pass necessary parameters like racetrack and options

# Create a ValueIteration instance with the simulator
value_iteration = ValueIteration(simulator, gamma=0.99, theta=0.1)

# Run the Value Iteration algorithm
value_iteration.run()

# Retrieve the computed optimal policy
optimal_policy = value_iteration.get_policy()


In [15]:
# Print the optimal policy to see the recommended action for each state
for state, action in optimal_policy.items():
    print(f"State: {state}, Action: {action}")

State: (1, 6, -5, -5), Action: (-1, -1)
State: (1, 6, -5, -4), Action: (-1, -1)
State: (1, 6, -5, -3), Action: (-1, -1)
State: (1, 6, -5, -2), Action: (-1, -1)
State: (1, 6, -5, -1), Action: (-1, -1)
State: (1, 6, -5, 0), Action: (-1, -1)
State: (1, 6, -5, 1), Action: (-1, -1)
State: (1, 6, -5, 2), Action: (-1, -1)
State: (1, 6, -5, 3), Action: (-1, -1)
State: (1, 6, -5, 4), Action: (-1, -1)
State: (1, 6, -5, 5), Action: (-1, -1)
State: (1, 6, -4, -5), Action: (-1, -1)
State: (1, 6, -4, -4), Action: (-1, -1)
State: (1, 6, -4, -3), Action: (-1, -1)
State: (1, 6, -4, -2), Action: (-1, -1)
State: (1, 6, -4, -1), Action: (-1, -1)
State: (1, 6, -4, 0), Action: (-1, -1)
State: (1, 6, -4, 1), Action: (-1, -1)
State: (1, 6, -4, 2), Action: (-1, -1)
State: (1, 6, -4, 3), Action: (-1, -1)
State: (1, 6, -4, 4), Action: (-1, -1)
State: (1, 6, -4, 5), Action: (-1, -1)
State: (1, 6, -3, -5), Action: (-1, -1)
State: (1, 6, -3, -4), Action: (-1, -1)
State: (1, 6, -3, -3), Action: (-1, -1)
State: (1, 6

In [16]:
initial_state = (simulator.start_x, simulator.start_y, 0, 0)  # Example initial state
steps_taken = simulator.simulate_with_policy(optimal_policy)
print(f"Total steps taken to reach the finish: {steps_taken}")


Step 0: State (1, 8, 0, 0) -> Action (1, -1) -> New State (2, 7, 1, -1), Reward -1
Step 1: State (2, 7, 1, -1) -> Action (1, 1) -> New State (4, 7, 2, 0), Reward -1
Step 2: State (4, 7, 2, 0) -> Action (1, -1) -> New State (7, 6, 3, -1), Reward -1
Step 3: State (7, 6, 3, -1) -> Action (1, 1) -> New State (8, 6, 0, 0), Reward -1
Step 4: State (8, 6, 0, 0) -> Action (1, 0) -> New State (9, 6, 1, 0), Reward -1
Step 5: State (9, 6, 1, 0) -> Action (0, -1) -> New State (10, 6, 1, 0), Reward -1
Step 6: State (10, 6, 1, 0) -> Action (1, 1) -> New State (12, 7, 2, 1), Reward -1
Step 7: State (12, 7, 2, 1) -> Action (1, 0) -> New State (15, 8, 3, 1), Reward -1
Step 8: State (15, 8, 3, 1) -> Action (1, 0) -> New State (18, 9, 3, 1), Reward -1
Step 9: State (18, 9, 3, 1) -> Action (1, -1) -> New State (22, 9, 4, 0), Reward -1
Step 10: State (22, 9, 4, 0) -> Action (-1, 0) -> New State (25, 9, 3, 0), Reward -1
Step 11: State (25, 9, 3, 0) -> Action (0, -1) -> New State (28, 8, 3, -1), Reward -1
St

In [17]:
q_learning = QLearning(simulator, alpha=0.1, gamma=0.99, epsilon=0.4)
num_episodes = 5000
q_learning.train(num_episodes)
optimal_policy = q_learning.get_policy()

In [30]:
# Simulate the racetrack with the learned optimal policy
initial_state = (simulator.start_x, simulator.start_y, 0, 0)  # Example initial state
steps_taken = simulator.simulate_with_policy(optimal_policy)
print(f"Total steps taken to reach the finish: {steps_taken}")

Step 0: State (1, 8, 0, 0) -> Action (1, 0) -> New State (2, 8, 1, 0), Reward -1
Step 1: State (2, 8, 1, 0) -> Action (1, 0) -> New State (3, 8, 1, 0), Reward -1
Step 2: State (3, 8, 1, 0) -> Action (1, 0) -> New State (5, 8, 2, 0), Reward -1
Step 3: State (5, 8, 2, 0) -> Action (1, 0) -> New State (8, 8, 3, 0), Reward -1
Step 4: State (8, 8, 3, 0) -> Action (1, 0) -> New State (12, 8, 4, 0), Reward -1
Step 5: State (12, 8, 4, 0) -> Action (0, 0) -> New State (16, 8, 4, 0), Reward -1
Step 6: State (16, 8, 4, 0) -> Action (-1, 0) -> New State (19, 8, 3, 0), Reward -1
Step 7: State (19, 8, 3, 0) -> Action (-1, 0) -> New State (21, 8, 2, 0), Reward -1
Step 8: State (21, 8, 2, 0) -> Action (0, 1) -> New State (23, 8, 2, 0), Reward -1
Step 9: State (23, 8, 2, 0) -> Action (-1, 0) -> New State (25, 8, 2, 0), Reward -1
Step 10: State (25, 8, 2, 0) -> Action (1, -1) -> New State (28, 7, 3, -1), Reward -1
Step 11: State (28, 7, 3, -1) -> Action (-1, 1) -> New State (30, 7, 2, 0), Reward -1
Step

In [19]:
q_learning = SARSA(simulator, alpha=0.1, gamma=0.99, epsilon=0.4)
num_episodes = 5000
q_learning.train(num_episodes)
optimal_policy = q_learning.get_policy()


In [43]:
# Simulate the racetrack with the learned optimal policy
initial_state = (simulator.start_x, simulator.start_y, 0, 0)  # Example initial state
steps_taken = simulator.simulate_with_policy(optimal_policy)
print(f"Total steps taken to reach the finish: {steps_taken}")

Step 0: State (1, 6, 0, 0) -> Action (1, 0) -> New State (2, 6, 1, 0), Reward -1
Step 1: State (2, 6, 1, 0) -> Action (1, 0) -> New State (4, 6, 2, 0), Reward -1
Step 2: State (4, 6, 2, 0) -> Action (1, 1) -> New State (7, 7, 3, 1), Reward -1
Step 3: State (7, 7, 3, 1) -> Action (1, -1) -> New State (11, 7, 4, 0), Reward -1
Step 4: State (11, 7, 4, 0) -> Action (1, -1) -> New State (16, 6, 5, -1), Reward -1
Step 5: State (16, 6, 5, -1) -> Action (0, -1) -> New State (17, 6, 0, 0), Reward -1
Step 6: State (17, 6, 0, 0) -> Action (1, 1) -> New State (18, 7, 1, 1), Reward -1
Step 7: State (18, 7, 1, 1) -> Action (0, 0) -> New State (19, 8, 1, 1), Reward -1
Step 8: State (19, 8, 1, 1) -> Action (0, -1) -> New State (20, 8, 1, 0), Reward -1
Step 9: State (20, 8, 1, 0) -> Action (1, 0) -> New State (22, 8, 2, 0), Reward -1
Step 10: State (22, 8, 2, 0) -> Action (1, 0) -> New State (24, 8, 2, 0), Reward -1
Step 11: State (24, 8, 2, 0) -> Action (0, -1) -> New State (26, 7, 2, -1), Reward -1
S