In [1]:
import os
os.makedirs('src', exist_ok=True)
os.makedirs('models', exist_ok=True)
os.makedirs('results', exist_ok=True)

with open('src/__init__.py', 'w') as f:
    pass

In [2]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [3]:
import sys
import time
import numpy as np
import pandas as pd

In [4]:
if 'src' not in sys.path:
    sys.path.append(os.path.join(os.getcwd()))

In [5]:
from src.train import train_dqn_pid_tuner

In [6]:
print("\n--- Running Training Script (src/train.py) ---\n")
start_time = time.time()

try:
# This function executes the DQN training loop
    train_dqn_pid_tuner()
    end_time = time.time()
    print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")
except Exception as e:
    print(f"Training failed with error: {e}")


--- Running Training Script (src/train.py) ---

--- Starting DQN Training for PID Auto-Tuning ---


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Ep:   10/500 | Reward:  -1126.21 | Avg R (10): -2286.45 | Kp/Kd: 5.50/4.00 | Epsilon: 0.9970
Ep:   20/500 | Reward:  -1102.55 | Avg R (10): -1124.56 | Kp/Kd: 4.50/4.00 | Epsilon: 0.9940
Ep:   30/500 | Reward:  -1086.48 | Avg R (10): -1097.62 | Kp/Kd: 3.50/4.00 | Epsilon: 0.9910
Ep:   40/500 | Reward:  -1090.58 | Avg R (10): -1093.80 | Kp/Kd: 2.50/5.50 | Epsilon: 0.9881
Ep:   50/500 | Reward:  -1092.62 | Avg R (10): -1094.66 | Kp/Kd: 2.00/6.00 | Epsilon: 0.9851
Ep:   60/500 | Reward:  -1109.16 | Avg R (10): -1096.57 | Kp/Kd: 2.50/7.00 | Epsilon: 0.9822
Ep:   70/500 | Reward:  -1081.71 | Avg R (10): -1091.66 | Kp/Kd: 2.00/5.00 | Epsilon: 0.9792
Ep:   80/500 | Reward:  -1130.71 | Avg R (10): -1101.85 | Kp/Kd: 4.00/7.50 | Epsilon: 0.9763
Ep:   90/500 | Reward:  -4561.17 | Avg R (10): -2611.73 | Kp/Kd: 16.00/7.50 | Epsilon: 0.9734
Training failed with error: [Errno 2] Unable to synchronously create file (unable to open file: name = '..\models\dqn_pid_tuner.weights.h5', errno = 2, error mess

Analysis and Visualization

In [None]:
import matplotlib.pyplot as plt

In [None]:
log_path = 'results/training_log.csv'
if not os.path.exists(log_path):
print(f"Error: Log file not found at {log_path}. Please check training execution.")
else:
df = pd.read_csv(log_path)
print("Training log loaded successfully.")

In [None]:
# Performance Metric Plot
plt.figure(figsize=(12, 5))
# Calculate a rolling mean for smoother visualization
df['rolling_reward'] = df['total_reward'].rolling(window=10).mean()
plt.plot(df['episode'], df['total_reward'], alpha=0.3, label='Episode Reward')
plt.plot(df['episode'], df['rolling_reward'], color='red', linewidth=2, label='Rolling Mean (10 Episodes)')
plt.title('DQN Performance: Total Reward per Episode')
plt.xlabel('Episode')
plt.ylabel('Total Reward (Negative ISE)')
plt.grid(True)
plt.legend()
plt.show()

# Trajectory of Learned PID Gains Plot
plt.figure(figsize=(12, 5))
plt.plot(df['episode'], df['final_Kp'], label='Final Kp (Proportional Gain)', color='blue')
plt.plot(df['episode'], df['final_Kd'], label='Final Kd (Derivative Gain)', color='green')
plt.title('DQN Auto-Tuning: Trajectory of PID Gains')
plt.xlabel('Episode')
plt.ylabel('Gain Value')
plt.grid(True)
plt.legend()
plt.show()

final_Kp = df['final_Kp'].iloc[-1]
final_Kd = df['final_Kd'].iloc[-1]
print(f"\n--- Final Optimized Gains ---")
print(f"Learned Kp: {final_Kp:.4f}")
print(f"Learned Kd: {final_Kd:.4f}")


Testing: Simulation using the final learned Kp and Kd to observe the actual altitude trajectory.

In [None]:
from src.quadcopter_env import QuadcopterPIDA

if os.path.exists(log_path):

test_Kp = df['final_Kp'].iloc[-1]
test_Kd = df['final_Kd'].iloc[-1]

In [None]:
test_env = QuadcopterPIDA()
# Assuming Ki=0 (default) as only Kp and Kd were tuned
test_env.pid_controller.set_gains(test_Kp, test_env.pid_controller.Ki, test_Kd)

# Reset state
state, _ = test_env.reset()

print(f"Testing with Kp={test_Kp:.4f}, Kd={test_Kd:.4f}")

# Simulate a single episode (100 RL steps) by forcing a NO-OP action (index 4)
# to keep the learned gains constant during the test.
altitude_history = []

for rl_step in range(100):
    # The step function returns the detailed altitude history for plotting
    state, reward, terminated, truncated, info = test_env.step(4) 
    altitude_history.extend(info['altitude_history'])
    if terminated or truncated:
        break

# Plot the altitude trajectory
plt.figure(figsize=(12, 5))
plt.plot(altitude_history, label='Quadcopter Altitude (z)')
plt.axhline(y=test_env.pid_controller.setpoint, color='r', linestyle='--', label='Setpoint (1.0m)')
plt.title('Altitude Trajectory using Final Learned Gains')
plt.xlabel(f'Internal Simulation Step (Total: {len(altitude_history)})')
plt.ylabel('Altitude (m)')
plt.ylim(0, 1.5)
plt.legend()
plt.grid(True)
plt.show()


In [None]:
else:
print("Cannot run test simulation because training log was not found.")