In [None]:
import numpy as np
import math
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from collections import deque
import random

# --- 1. Environment and Simulation ---

# Constants
GOAL_POS = (25.0, 25.0)
ROBOT_SPEED = 1.0
MAX_TURN_RATE = np.deg2rad(45)
DT = 0.1
MAX_EPISODE_STEPS = 500
GOAL_RADIUS = 1.0
SEQUENCE_LENGTH = 5 # How many past steps the LSTM should remember

# Obstacles represented as rectangles [x_min, y_min, x_max, y_max]
OBSTACLES = [
    [10, 0, 12, 15],
    [10, 20, 12, 30],
    [18, 10, 20, 25]
]

class Robot:
    def __init__(self, x, y, theta):
        self.x = x
        self.y = y
        self.theta = theta

    def move(self, omega):
        self.theta += np.clip(omega, -MAX_TURN_RATE, MAX_TURN_RATE) * DT
        self.theta = math.atan2(math.sin(self.theta), math.cos(self.theta))
        self.x += ROBOT_SPEED * math.cos(self.theta) * DT
        self.y += ROBOT_SPEED * math.sin(self.theta) * DT
        return (self.x, self.y), self.theta

def get_lidar_readings(robot_pos, robot_theta, num_sensors=5, max_dist=10.0):
    """Simulates 5 Lidar sensors in a 180-degree arc."""
    readings = np.full(num_sensors, max_dist)
    angles = np.linspace(-np.pi/2, np.pi/2, num_sensors) + robot_theta
    
    for i, angle in enumerate(angles):
        for dist in np.arange(0.1, max_dist, 0.1):
            px = robot_pos[0] + dist * math.cos(angle)
            py = robot_pos[1] + dist * math.sin(angle)
            for obs in OBSTACLES:
                if obs[0] <= px <= obs[2] and obs[1] <= py <= obs[3]:
                    readings[i] = dist
                    break
            if readings[i] != max_dist:
                break
    return readings

def get_state(robot_pos, robot_theta):
    """Generates the 7-dimensional state vector for the neural network."""
    lidar = get_lidar_readings(robot_pos, robot_theta)
    
    dx = GOAL_POS[0] - robot_pos[0]
    dy = GOAL_POS[1] - robot_pos[1]
    
    dist_to_goal = math.sqrt(dx**2 + dy**2)
    angle_to_goal = math.atan2(dy, dx)
    angle_diff = math.atan2(math.sin(angle_to_goal - robot_theta), math.cos(angle_to_goal - robot_theta))
    
    # State: [5 lidar readings, distance_to_goal, angle_to_goal]
    # We normalize distances to be smaller values
    state = np.concatenate((lidar / 10.0, [dist_to_goal / 30.0, angle_diff / np.pi]))
    return state
    
# --- 2. The Expert and The Learner Models ---

def apf_expert_policy(state):
    """A simple APF-based expert. It's good but can get stuck."""
    lidar_readings = state[:5] * 10.0
    angle_to_goal = state[6] * np.pi
    
    # Repulsive force from obstacles
    repulsive_omega = 0.0
    # Give more weight to front sensors
    weights = [0.1, 0.3, 1.0, 0.3, 0.1] 
    turn_directions = [np.pi/2, np.pi/4, 0, -np.pi/4, -np.pi/2]

    for i in range(5):
        if lidar_readings[i] < 3.0: # If an obstacle is close
            # Force is inversely proportional to distance
            repulsive_omega += weights[i] * (1.0 / max(lidar_readings[i], 0.1)) * turn_directions[i]

    # Attractive force towards the goal
    attractive_omega = 2.0 * angle_to_goal

    # Combine forces
    total_omega = attractive_omega - repulsive_omega # Subtract because repulsive turns away
    return np.clip(total_omega, -MAX_TURN_RATE, MAX_TURN_RATE)

def create_lstm_model(sequence_length, n_inputs, n_outputs):
    model = Sequential([
        Input(shape=(sequence_length, n_inputs)),
        LSTM(64, return_sequences=True),
        LSTM(64),
        Dense(32, activation='relu'),
        Dense(n_outputs, activation='tanh') # Output is normalized turn rate [-1, 1]
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.summary()
    return model

def run_episode(start_pos, start_theta, policy_model, use_exploration=False, epsilon=0.0):
    """Runs a single episode for any given model (expert or LSTM)."""
    robot = Robot(x=start_pos[0], y=start_pos[1], theta=start_theta)
    path_taken = [start_pos]
    state_history = deque(maxlen=SEQUENCE_LENGTH)
    
    # Data for training
    episode_sequences = []
    episode_actions = []

    for _ in range(MAX_EPISODE_STEPS):
        pos = (robot.x, robot.y)
        
        # Get current state and update history
        current_state = get_state(pos, robot.theta)
        state_history.append(current_state)
        
        if len(state_history) < SEQUENCE_LENGTH:
            # Not enough history, move straight-ish
            omega = 0.1 * (random.random() - 0.5)
        else:
            sequence = np.array(state_history)
            
            if policy_model == "expert":
                # Use the last state for the stateless expert
                omega = apf_expert_policy(current_state)
            else: # It's a Keras model
                # Reshape for LSTM: (1, sequence_length, n_inputs)
                nn_input = np.reshape(sequence, (1, SEQUENCE_LENGTH, 7))
                # Model outputs a value in [-1, 1]
                normalized_omega = policy_model.predict(nn_input, verbose=0)[0][0]

                if use_exploration:
                    if random.random() < epsilon:
                        # Explore by adding random noise
                        normalized_omega += random.uniform(-0.5, 0.5)
                
                # Scale it to the robot's max turn rate
                omega = normalized_omega * MAX_TURN_RATE

            # Store data for training
            episode_sequences.append(sequence)
            episode_actions.append(omega / MAX_TURN_RATE) # Store normalized action

        new_pos, _ = robot.move(omega)
        path_taken.append(new_pos)

        # Check for success
        if math.dist(new_pos, GOAL_POS) < GOAL_RADIUS:
            return path_taken, True, episode_sequences, episode_actions

    return path_taken, False, episode_sequences, episode_actions


# --- 3. Main Execution Block ---

if __name__ == "__main__":

    # =========================================================================
    # PART 1: Generate Training Data with the "Expert" (APF)
    # =========================================================================
    print("\n--- Part 1: Generating Data with APF Expert ---")
    NUM_EXPERT_EPISODES = 50
    all_expert_sequences = []
    all_expert_actions = []

    for i in range(NUM_EXPERT_EPISODES):
        print(f"Running expert episode {i+1}/{NUM_EXPERT_EPISODES}")
        start_pos = (random.uniform(0, 5), random.uniform(0, 5))
        start_theta = random.uniform(-np.pi, np.pi)
        _, success, sequences, actions = run_episode(start_pos, start_theta, "expert")
        if success:
            all_expert_sequences.extend(sequences)
            all_expert_actions.extend(actions)

    X_train_expert = np.array(all_expert_sequences)
    y_train_expert = np.array(all_expert_actions)
    print(f"Generated {X_train_expert.shape[0]} training samples from successful expert episodes.")

    # =========================================================================
    # PART 2: Pre-train the LSTM model (Creating LSTM_FT)
    # =========================================================================
    print("\n--- Part 2: Pre-training the LSTM_FT model ---")
    lstm_ft_model = create_lstm_model(SEQUENCE_LENGTH, 7, 1)
    lstm_ft_model.fit(X_train_expert, y_train_expert, epochs=20, batch_size=32, validation_split=0.2)
    
    # =========================================================================
    # PART 3: Fine-tune with Reinforcement Learning (Creating LSTM_FTR)
    # =========================================================================
    print("\n--- Part 3: Fine-tuning to create LSTM_FTR model ---")
    # Make a copy to fine-tune, preserving the original FT model
    lstm_ftr_model = tf.keras.models.clone_model(lstm_ft_model)
    lstm_ftr_model.set_weights(lstm_ft_model.get_weights())
    lstm_ftr_model.compile(optimizer='adam', loss='mean_squared_error')

    NUM_RL_EPOCHS = 10
    EPISODES_PER_EPOCH = 20
    epsilon = 0.5 # Initial exploration rate

    for epoch in range(NUM_RL_EPOCHS):
        print(f"\nRL Epoch {epoch+1}/{NUM_RL_EPOCHS}, Epsilon: {epsilon:.2f}")
        best_episodes_data = []
        
        for i in range(EPISODES_PER_EPOCH):
            start_pos = (random.uniform(0, 5), random.uniform(0, 5))
            start_theta = random.uniform(-np.pi, np.pi)
            path, success, sequences, actions = run_episode(start_pos, start_theta, lstm_ftr_model, use_exploration=True, epsilon=epsilon)
            
            if success:
                # Reward is inversely proportional to path length (shorter is better)
                reward = 100 / len(path)
                best_episodes_data.append({'reward': reward, 'sequences': sequences, 'actions': actions})

        if not best_episodes_data:
            print("No successful paths in this RL epoch, skipping training.")
            continue
            
        # Sort episodes by reward and take the top 50%
        best_episodes_data.sort(key=lambda x: x['reward'], reverse=True)
        top_episodes = best_episodes_data[:len(best_episodes_data)//2]
        
        # Collect data from only the best episodes for fine-tuning
        X_fine_tune = []
        y_fine_tune = []
        for episode_data in top_episodes:
            X_fine_tune.extend(episode_data['sequences'])
            y_fine_tune.extend(episode_data['actions'])
            
        if X_fine_tune:
            print(f"Fine-tuning on {len(X_fine_tune)} samples from the best paths...")
            lstm_ftr_model.fit(np.array(X_fine_tune), np.array(y_fine_tune), epochs=5, batch_size=16, verbose=0)
        
        # Decrease exploration rate over time
        epsilon = max(0.1, epsilon * 0.9)

    # =========================================================================
    # PART 4: Final Evaluation and Comparison
    # =========================================================================
    print("\n--- Part 4: Evaluating all three models ---")
    plt.figure(figsize=(12, 12))
    
    # Plot obstacles and goal
    for obs in OBSTACLES:
        plt.gca().add_patch(plt.Rectangle((obs[0], obs[1]), obs[2]-obs[0], obs[3]-obs[1], color='gray'))
    plt.plot(GOAL_POS[0], GOAL_POS[1], 'r*', markersize=20, label='Goal')
    
    # Test all three models from the same starting point
    eval_start_pos = (2, 2)
    eval_start_theta = np.pi / 4

    # 1. Expert Path
    path_expert, _, _, _ = run_episode(eval_start_pos, eval_start_theta, "expert")
    path_expert_np = np.array(path_expert)
    plt.plot(path_expert_np[:, 0], path_expert_np[:, 1], 'b--', label=f'Expert (APF) Path - {len(path_expert)} steps')
    
    # 2. LSTM_FT Path
    path_ft, _, _, _ = run_episode(eval_start_pos, eval_start_theta, lstm_ft_model)
    path_ft_np = np.array(path_ft)
    plt.plot(path_ft_np[:, 0], path_ft_np[:, 1], 'g-.', label=f'LSTM_FT Path - {len(path_ft)} steps')

    # 3. LSTM_FTR Path
    path_ftr, _, _, _ = run_episode(eval_start_pos, eval_start_theta, lstm_ftr_model)
    path_ftr_np = np.array(path_ftr)
    plt.plot(path_ftr_np[:, 0], path_ftr_np[:, 1], 'm-', linewidth=2.5, label=f'LSTM_FTR Path (Final) - {len(path_ftr)} steps')
    
    plt.title("Comparison of Expert, LSTM_FT, and LSTM_FTR Models")
    plt.xlabel("X Position")
    plt.ylabel("Y Position")
    plt.legend()
    plt.grid(True)
    plt.axis('equal')
    plt.show()


--- Part 1: Generating Data with APF Expert ---
Running expert episode 1/50
Running expert episode 2/50
Running expert episode 3/50
Running expert episode 4/50
Running expert episode 5/50
Running expert episode 6/50
Running expert episode 7/50
Running expert episode 8/50
Running expert episode 9/50
Running expert episode 10/50
Running expert episode 11/50
Running expert episode 12/50
Running expert episode 13/50
Running expert episode 14/50
Running expert episode 15/50
Running expert episode 16/50
Running expert episode 17/50
Running expert episode 18/50
Running expert episode 19/50
Running expert episode 20/50
Running expert episode 21/50
Running expert episode 22/50
Running expert episode 23/50
Running expert episode 24/50
Running expert episode 25/50
Running expert episode 26/50
Running expert episode 27/50
Running expert episode 28/50
Running expert episode 29/50
Running expert episode 30/50
Running expert episode 31/50
Running expert episode 32/50
Running expert episode 33/50
Run

Epoch 1/20
[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - loss: 0.0951 - val_loss: 0.0265
Epoch 2/20
[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 0.0257 - val_loss: 0.0083
Epoch 3/20
[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - loss: 0.0099 - val_loss: 0.0067
Epoch 4/20
[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - loss: 0.0078 - val_loss: 0.0038
Epoch 5/20
[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 0.0064 - val_loss: 0.0041
Epoch 6/20
[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 0.0067 - val_loss: 0.0033
Epoch 7/20
[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.0052 - val_loss: 0.0038
Epoch 8/20
[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - loss: 0.0056 - val_loss: 0.0030
Epoch 9/20
[1m419/419[0m [32m━━━━━━