# Missile PID Tuning with Reinforcement Learning - GPU Training

Bu notebook, 2D savaş ortamında manevra yapan hedeflere karşı füze otopilotu PID parametrelerinin pekiştirmeli öğrenme ile adaptif ayarlanması için Kaggle GPU ortamında eğitim yapmak üzere tasarlanmıştır.

## Kurulum

1. Bu notebook'u Kaggle'a yükleyin
2. Settings > Accelerator > GPU'yu etkinleştirin
3. Internet'i etkinleştirin (Add Data > Internet)
4. Çalıştırın!

## 1. Repoyu Klonlama ve Gerekli Paketleri Yükleme

In [None]:
# Repoyu klonla
!git clone https://github.com/YOUR_USERNAME/2D-missile-PID-tuning-with-RL.git
%cd 2D-missile-PID-tuning-with-RL

In [None]:
# Gerekli paketleri yükle
!pip install -q -r requirements.txt

In [None]:
# GPU kontrolü
import torch
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Device Name: {torch.cuda.get_device_name(0)}")
    print(f"Device Count: {torch.cuda.device_count()}")

## 2. Konfigürasyon

In [None]:
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3 import PPO, SAC, TD3
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines3.common.monitor import Monitor

# Import environment
from envs.missile_pid_env import MissilePIDEnv
from config import get_default_config

In [None]:
# Eğitim parametreleri
CONFIG = {
    # Environment
    'max_steps': 500,
    'dt': 0.1,
    'map_size': 10000.0,
    'hit_radius': 50.0,
    'target_maneuver': 'circular',  # 'straight', 'circular', 'zigzag', 'evasive'
    
    # Training
    'algorithm': 'PPO',  # 'PPO', 'SAC', 'TD3'
    'total_timesteps': 2_000_000,  # Kaggle'da GPU ile daha hızlı
    'learning_rate': 3e-4,
    'batch_size': 128,
    'n_steps': 2048,
    'gamma': 0.99,
    
    # Model
    'hidden_size': 256,
    'n_layers': 2,
    
    # Other
    'seed': 42,
    'device': 'cuda',
    'n_envs': 8,  # Paralel environment sayısı
    'save_freq': 50000,
    'eval_freq': 10000,
}

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")

## 3. Environment Test

In [None]:
# Test environment
env = MissilePIDEnv(
    max_steps=CONFIG['max_steps'],
    dt=CONFIG['dt'],
    map_size=CONFIG['map_size'],
    hit_radius=CONFIG['hit_radius'],
    target_maneuver=CONFIG['target_maneuver']
)

obs, info = env.reset()
print(f"Observation shape: {obs.shape}")
print(f"Action space: {env.action_space}")
print(f"Observation space: {env.observation_space}")

# Test random actions
for _ in range(5):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    print(f"Step: reward={reward:.2f}, terminated={terminated}, truncated={truncated}")
    if terminated or truncated:
        break

print("\nEnvironment test successful!")

## 4. Training

In [None]:
# Dizinleri oluştur
log_dir = f"./logs/{CONFIG['algorithm']}_{CONFIG['target_maneuver']}"
save_dir = f"./models/{CONFIG['algorithm']}_{CONFIG['target_maneuver']}"
os.makedirs(log_dir, exist_ok=True)
os.makedirs(save_dir, exist_ok=True)

print(f"Log directory: {log_dir}")
print(f"Save directory: {save_dir}")

In [None]:
# Vectorized training environment
env_kwargs = {
    'max_steps': CONFIG['max_steps'],
    'dt': CONFIG['dt'],
    'map_size': CONFIG['map_size'],
    'hit_radius': CONFIG['hit_radius'],
    'target_maneuver': CONFIG['target_maneuver'],
}

train_env = make_vec_env(
    lambda: MissilePIDEnv(**env_kwargs),
    n_envs=CONFIG['n_envs'],
    seed=CONFIG['seed']
)

# Evaluation environment
eval_env = MissilePIDEnv(**env_kwargs)
eval_env = Monitor(eval_env)

print(f"Created {CONFIG['n_envs']} parallel training environments")

In [None]:
# Model oluştur
policy_kwargs = dict(
    net_arch=[CONFIG['hidden_size']] * CONFIG['n_layers']
)

if CONFIG['algorithm'] == "PPO":
    model = PPO(
        "MlpPolicy",
        train_env,
        learning_rate=CONFIG['learning_rate'],
        n_steps=CONFIG['n_steps'],
        batch_size=CONFIG['batch_size'],
        gamma=CONFIG['gamma'],
        policy_kwargs=policy_kwargs,
        verbose=1,
        tensorboard_log=log_dir,
        device=CONFIG['device'],
        seed=CONFIG['seed']
    )
elif CONFIG['algorithm'] == "SAC":
    model = SAC(
        "MlpPolicy",
        train_env,
        learning_rate=CONFIG['learning_rate'],
        batch_size=CONFIG['batch_size'],
        gamma=CONFIG['gamma'],
        policy_kwargs=policy_kwargs,
        verbose=1,
        tensorboard_log=log_dir,
        device=CONFIG['device'],
        seed=CONFIG['seed']
    )
elif CONFIG['algorithm'] == "TD3":
    model = TD3(
        "MlpPolicy",
        train_env,
        learning_rate=CONFIG['learning_rate'],
        batch_size=CONFIG['batch_size'],
        gamma=CONFIG['gamma'],
        policy_kwargs=policy_kwargs,
        verbose=1,
        tensorboard_log=log_dir,
        device=CONFIG['device'],
        seed=CONFIG['seed']
    )

print(f"Created {CONFIG['algorithm']} model on {CONFIG['device']}")
print(f"Total parameters: {sum(p.numel() for p in model.policy.parameters()):,}")

In [None]:
# Callbacks
checkpoint_callback = CheckpointCallback(
    save_freq=CONFIG['save_freq'],
    save_path=save_dir,
    name_prefix="model",
    save_replay_buffer=True,
    save_vecnormalize=True,
)

eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=save_dir,
    log_path=log_dir,
    eval_freq=CONFIG['eval_freq'],
    n_eval_episodes=10,
    deterministic=True,
    render=False
)

print("Callbacks configured")

In [None]:
# EĞİTİM BAŞLAT!
print("="*60)
print(f"Starting training: {CONFIG['total_timesteps']:,} timesteps")
print("="*60)

model.learn(
    total_timesteps=CONFIG['total_timesteps'],
    callback=[checkpoint_callback, eval_callback],
    progress_bar=True
)

print("\nTraining completed!")

In [None]:
# Final modeli kaydet
final_model_path = os.path.join(save_dir, "final_model")
model.save(final_model_path)
print(f"Final model saved to: {final_model_path}")

## 5. Evaluation

In [None]:
# Test environment
test_env = MissilePIDEnv(**env_kwargs)

n_test_episodes = 10
episode_rewards = []
hit_success = []

print(f"\nTesting model on {n_test_episodes} episodes...")

for episode in range(n_test_episodes):
    obs, info = test_env.reset()
    episode_reward = 0
    done = False
    
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = test_env.step(action)
        episode_reward += reward
        done = terminated or truncated
    
    episode_rewards.append(episode_reward)
    hit_success.append(info.get('hit', False))
    
    print(f"Episode {episode+1}: Reward={episode_reward:.2f}, Hit={'Yes' if info.get('hit', False) else 'No'}")

print("\n" + "="*60)
print(f"Average Reward: {np.mean(episode_rewards):.2f} ± {np.std(episode_rewards):.2f}")
print(f"Hit Success Rate: {np.mean(hit_success)*100:.1f}%")
print("="*60)

## 6. Visualization

In [None]:
# Visualize one episode
obs, info = test_env.reset()
done = False

while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)
    done = terminated or truncated

# Plot trajectory
fig, ax = plt.subplots(figsize=(12, 12))

# Missile trajectory
missile_traj = np.array(test_env.missile.trajectory)
ax.plot(missile_traj[:, 0], missile_traj[:, 1], 'b-', linewidth=2, label='Missile', alpha=0.7)
ax.plot(missile_traj[0, 0], missile_traj[0, 1], 'bo', markersize=12, label='Missile Start')
ax.plot(missile_traj[-1, 0], missile_traj[-1, 1], 'bs', markersize=12, label='Missile End')

# Target trajectory
target_traj = np.array(test_env.target.trajectory)
ax.plot(target_traj[:, 0], target_traj[:, 1], 'r-', linewidth=2, label='Target', alpha=0.7)
ax.plot(target_traj[0, 0], target_traj[0, 1], 'ro', markersize=12, label='Target Start')
ax.plot(target_traj[-1, 0], target_traj[-1, 1], 'rs', markersize=12, label='Target End')

# Hit radius
circle = plt.Circle((target_traj[-1, 0], target_traj[-1, 1]),
                    test_env.hit_radius, color='r', fill=False,
                    linestyle='--', linewidth=2, label='Hit Radius')
ax.add_patch(circle)

ax.set_xlabel('X Position (m)', fontsize=12)
ax.set_ylabel('Y Position (m)', fontsize=12)
ax.set_title(f'Missile vs {CONFIG["target_maneuver"].capitalize()} Target', fontsize=14, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)
ax.axis('equal')

plt.tight_layout()
plt.savefig('trajectory.png', dpi=150)
plt.show()

print(f"Hit: {'Yes' if info.get('hit', False) else 'No'}")
print(f"Final distance: {info.get('distance', 0):.2f}m")

## 7. Model'i İndirme

Eğitilmiş modeli ve logları indirmek için Kaggle Output bölümünden alabilirsiniz.

In [None]:
# Model dosyalarını ziple
!zip -r trained_model.zip {save_dir}
!zip -r logs.zip {log_dir}

print("Model ve loglar ziplenmiştir. Output sekmesinden indirebilirsiniz.")