In [39]:
!python --version

Python 3.11.5


In [40]:
import gymnasium as gym
import random

In [41]:
env = gym.make('CartPole-v1',render_mode="rgb_array")
states = env.observation_space.shape[0]
actions = env.action_space.n

In [42]:
actions

np.int64(2)

In [43]:
states

4

In [44]:
import gymnasium as gym
import random

env = gym.make("CartPole-v1", render_mode="human")  # ✅ render mode set
episodes = 10

for episode in range(1, episodes + 1):
    state, info = env.reset()  # ✅ unpack both values
    score = 0
    done = False

    while not done:
        action = random.choice([0, 1])
        next_state, reward, terminated, truncated, info = env.step(action)
        score += reward
        done = terminated or truncated  # ✅ update done properly

    print(f"Episode: {episode}  Score: {score}")

env.close()


Episode: 1  Score: 13.0
Episode: 2  Score: 15.0
Episode: 3  Score: 55.0
Episode: 4  Score: 43.0
Episode: 5  Score: 17.0
Episode: 6  Score: 10.0
Episode: 7  Score: 10.0
Episode: 8  Score: 32.0
Episode: 9  Score: 13.0
Episode: 10  Score: 35.0


In [2]:
import os
from stable_baselines3.common.vec_env import DummyVecEnv
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
import imageio

# ---------------- CONFIGURATION ----------------
env_id = "CartPole-v1"
total_timesteps = 50_000
n_eval_episodes = 100
seeds = [0, 7, 21, 42, 84]  # 🌱 seeds to train

base_model_dir = "models/lr_1e-3_50k"
base_video_dir = "videos/lr_1e-3_50k"
result_file = "results_lr_1e-3_50k.txt"

# Create base directories
os.makedirs(base_model_dir, exist_ok=True)
os.makedirs(base_video_dir, exist_ok=True)

# Create or overwrite results file
with open(result_file, "w") as f:
    f.write("Seed\tMeanReward\tStdReward\n")

# ---------------- LOOP THROUGH SEEDS ----------------
for seed in seeds:
    print(f"\n🚀 Training DQN model with SEED = {seed} ...")

    # 1️⃣ Create environment
    env = DummyVecEnv([lambda: gym.make(env_id, render_mode=None, disable_env_checker=True)])
    env.seed(seed)

    # 2️⃣ Create model (params unchanged)
    model = DQN(
        "MlpPolicy",
        env,
        learning_rate=1e-3,
        buffer_size=50_000,
        exploration_final_eps=0.01,
        target_update_interval=100,
        verbose=1,
        tensorboard_log="./logs/lr_1e-3_50k",
        seed=seed
    )

    # 3️⃣ Train the model
    model.learn(total_timesteps=total_timesteps)

    # 4️⃣ Evaluate trained model
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes)
    print(f"✅ Seed {seed}: Mean reward = {mean_reward:.2f} ± {std_reward:.2f}")

    # Append result to file
    with open(result_file, "a") as f:
        f.write(f"{seed}\t{mean_reward:.2f}\t{std_reward:.2f}\n")

    # 5️⃣ Save model (structured)
    model_folder = os.path.join(base_model_dir, f"seed_{seed}")
    os.makedirs(model_folder, exist_ok=True)
    model_path = os.path.join(model_folder, f"dqn_cartpole_seed_{seed}")
    model.save(model_path)
    print(f"💾 Model saved at: {model_path}.zip")

    # 6️⃣ Record one full episode video
    seed_video_folder = os.path.join(base_video_dir, f"seed_val_{seed}")
    os.makedirs(seed_video_folder, exist_ok=True)

    env_vis = gym.make(env_id, render_mode="rgb_array")
    model = DQN.load(model_path, env=env_vis)

    frames = []
    obs, info = env_vis.reset(seed=seed)
    done, truncated = False, False
    total_reward = 0

    while not (done or truncated):
        frame = env_vis.render()
        frames.append(frame)
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env_vis.step(action)
        total_reward += reward

    env_vis.close()

    # 7️⃣ Save episode video
    output_video = os.path.join(seed_video_folder, f"dqn_cartpole_seed_{seed}.mp4")
    imageio.mimsave(output_video, frames, fps=30)

    print(f"🎥 Video saved at: {output_video}")
    print(f"🏆 Episode reward: {total_reward:.1f}")

print("\n✅ All seeds completed successfully!")
print(f"📄 Results saved to: {result_file}")



🚀 Training DQN model with SEED = 0 ...
Using cpu device
Logging to ./logs/lr_1e-3_50k\DQN_1
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.988    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 4216     |
|    time_elapsed     | 0        |
|    total_timesteps  | 59       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.973    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1800     |
|    time_elapsed     | 0        |
|    total_timesteps  | 135      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.419    |
|    n_updates        | 8        |
----------------------------------


----------------------------------
| rollout/            |          |
|    exploration_rate | 0.945    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 866      |
|    time_elapsed     | 0        |
|    total_timesteps  | 278      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0763   |
|    n_updates        | 44       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.932    |
| time/               |          |
|    episodes         | 16       |
|    fps              | 783      |
|    time_elapsed     | 0        |
|    total_timesteps  | 341      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.224    |
|    n_updates        | 60       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat



✅ Seed 0: Mean reward = 9.33 ± 0.75
💾 Model saved at: models/lr_1e-3_50k\seed_0\dqn_cartpole_seed_0.zip
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




🎥 Video saved at: videos/lr_1e-3_50k\seed_val_0\dqn_cartpole_seed_0.mp4
🏆 Episode reward: 11.0

🚀 Training DQN model with SEED = 7 ...
Using cpu device
Logging to ./logs/lr_1e-3_50k\DQN_2
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.983    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 10995    |
|    time_elapsed     | 0        |
|    total_timesteps  | 85       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.968    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 3152     |
|    time_elapsed     | 0        |
|    total_timesteps  | 163      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.276    |
|    n_updates        | 15       |
----------------------------------
----------------------------------
| rollo



🎥 Video saved at: videos/lr_1e-3_50k\seed_val_7\dqn_cartpole_seed_7.mp4
🏆 Episode reward: 9.0

🚀 Training DQN model with SEED = 21 ...
Using cpu device
Logging to ./logs/lr_1e-3_50k\DQN_3
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.981    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 3879     |
|    time_elapsed     | 0        |
|    total_timesteps  | 97       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.966    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1504     |
|    time_elapsed     | 0        |
|    total_timesteps  | 170      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.277    |
|    n_updates        | 17       |
----------------------------------
----------------------------------
| rollo



🎥 Video saved at: videos/lr_1e-3_50k\seed_val_21\dqn_cartpole_seed_21.mp4
🏆 Episode reward: 10.0

🚀 Training DQN model with SEED = 42 ...
Using cpu device
Logging to ./logs/lr_1e-3_50k\DQN_4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.982    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 11161    |
|    time_elapsed     | 0        |
|    total_timesteps  | 92       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.962    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 2885     |
|    time_elapsed     | 0        |
|    total_timesteps  | 194      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.216    |
|    n_updates        | 23       |
----------------------------------
----------------------------------
| ro



🎥 Video saved at: videos/lr_1e-3_50k\seed_val_42\dqn_cartpole_seed_42.mp4
🏆 Episode reward: 25.0

🚀 Training DQN model with SEED = 84 ...
Using cpu device
Logging to ./logs/lr_1e-3_50k\DQN_5
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.986    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 8552     |
|    time_elapsed     | 0        |
|    total_timesteps  | 71       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.971    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 3560     |
|    time_elapsed     | 0        |
|    total_timesteps  | 147      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.332    |
|    n_updates        | 11       |
----------------------------------
----------------------------------
| ro



🎥 Video saved at: videos/lr_1e-3_50k\seed_val_84\dqn_cartpole_seed_84.mp4
🏆 Episode reward: 10.0

✅ All seeds completed successfully!
📄 Results saved to: results_lr_1e-3_50k.txt


In [None]:
import os
from stable_baselines3.common.vec_env import DummyVecEnv
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
import imageio

# ---------------- CONFIGURATION ----------------
env_id = "CartPole-v1"
total_timesteps = 200_000
n_eval_episodes = 100
seeds = [0, 7, 21, 42, 84]

# Base folders and results file
base_model_dir = "models/ls_default"
base_video_dir = "videos/ls_default"
result_file = "results_ls_default.txt"

# Create folders if missing
os.makedirs(base_model_dir, exist_ok=True)
os.makedirs(base_video_dir, exist_ok=True)

# Create or overwrite results file
with open(result_file, "w") as f:
    f.write("Seed\tMeanReward\tStdReward\n")

# ---------------- LOOP THROUGH SEEDS ----------------
for seed in seeds:
    print(f"\n🚀 Training DQN model with SEED = {seed} ...")

    # 1️⃣ Create and seed environment
    env = DummyVecEnv([lambda: gym.make(env_id, render_mode=None, disable_env_checker=True)])
    for i in range(env.num_envs):
        env.envs[i].reset(seed=seed + i)

    # 2️⃣ Define model (unchanged hyperparameters)
    model = DQN(
        "MlpPolicy",
        env,
        learning_rate=5e-4,
        buffer_size=100_000,
        exploration_final_eps=0.02,
        exploration_fraction=0.3,
        target_update_interval=500,
        verbose=1,
        tensorboard_log="./logs/ls_default",
        seed=seed
    )

    # 3️⃣ Train the model
    model.learn(total_timesteps=total_timesteps)

    # 4️⃣ Evaluate trained model
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes)
    print(f"✅ Seed {seed}: Mean reward = {mean_reward:.2f} ± {std_reward:.2f}")

    # Append result to file
    with open(result_file, "a") as f:
        f.write(f"{seed}\t{mean_reward:.2f}\t{std_reward:.2f}\n")



    # 5️⃣ Save trained model
    model_folder = os.path.join(base_model_dir, f"seed_{seed}")
    os.makedirs(model_folder, exist_ok=True)
    model_path = os.path.join(model_folder, f"dqn_cartpole_seed_{seed}")
    model.save(model_path)
    print(f"💾 Model saved at: {model_path}.zip")

    # 6️⃣ Create visualization environment
    seed_video_folder = os.path.join(base_video_dir, f"seed_val_{seed}")
    os.makedirs(seed_video_folder, exist_ok=True)

    env_vis = gym.make(env_id, render_mode="rgb_array")
    model = DQN.load(model_path, env=env_vis)

    frames = []
    obs, info = env_vis.reset(seed=seed)
    done, truncated = False, False
    total_reward = 0

    # 7️⃣ Record one full episode
    while not (done or truncated):
        frame = env_vis.render()
        frames.append(frame)
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env_vis.step(action)
        total_reward += reward

    env_vis.close()

    # 8️⃣ Save recorded video
    output_video = os.path.join(seed_video_folder, f"dqn_cartpole_seed_{seed}.mp4")
    imageio.mimsave(output_video, frames, fps=30)
    print(f"🎥 Video saved at: {output_video}")
    print(f"🏆 Episode reward: {total_reward:.1f}")

print("\n✅ All seeds completed successfully!")
print(f"📄 Results saved to: {result_file}")



🚀 Training DQN model with SEED = 0 ...
Using cpu device
Logging to ./logs/ls_default\DQN_1
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 6558     |
|    time_elapsed     | 0        |
|    total_timesteps  | 59       |
----------------------------------


----------------------------------
| rollout/            |          |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 3067     |
|    time_elapsed     | 0        |
|    total_timesteps  | 135      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.497    |
|    n_updates        | 8        |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.996    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 2008     |
|    time_elapsed     | 0        |
|    total_timesteps  | 249      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.241    |
|    n_updates        | 37       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat



🎥 Video saved at: videos/ls_default\seed_val_0\dqn_cartpole_seed_0.mp4
🏆 Episode reward: 162.0

🚀 Training DQN model with SEED = 7 ...
Using cpu device
Logging to ./logs/ls_default\DQN_2
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 9445     |
|    time_elapsed     | 0        |
|    total_timesteps  | 85       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 2411     |
|    time_elapsed     | 0        |
|    total_timesteps  | 164      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.399    |
|    n_updates        | 15       |
----------------------------------
----------------------------------
| rollou



🎥 Video saved at: videos/ls_default\seed_val_7\dqn_cartpole_seed_7.mp4
🏆 Episode reward: 102.0

🚀 Training DQN model with SEED = 21 ...
Using cpu device
Logging to ./logs/ls_default\DQN_3
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 10781    |
|    time_elapsed     | 0        |
|    total_timesteps  | 97       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 2575     |
|    time_elapsed     | 0        |
|    total_timesteps  | 170      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.401    |
|    n_updates        | 17       |
----------------------------------
----------------------------------
| rollo



🎥 Video saved at: videos/ls_default\seed_val_21\dqn_cartpole_seed_21.mp4
🏆 Episode reward: 134.0

🚀 Training DQN model with SEED = 42 ...
Using cpu device
Logging to ./logs/ls_default\DQN_4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 11507    |
|    time_elapsed     | 0        |
|    total_timesteps  | 92       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 2211     |
|    time_elapsed     | 0        |
|    total_timesteps  | 199      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.361    |
|    n_updates        | 24       |
----------------------------------
----------------------------------
| rol



🎥 Video saved at: videos/ls_default\seed_val_42\dqn_cartpole_seed_42.mp4
🏆 Episode reward: 13.0

🚀 Training DQN model with SEED = 84 ...
Using cpu device
Logging to ./logs/ls_default\DQN_5
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 8857     |
|    time_elapsed     | 0        |
|    total_timesteps  | 71       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 2742     |
|    time_elapsed     | 0        |
|    total_timesteps  | 159      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.374    |
|    n_updates        | 14       |
----------------------------------
----------------------------------
| roll



🎥 Video saved at: videos/ls_default\seed_val_84\dqn_cartpole_seed_84.mp4
🏆 Episode reward: 282.0

✅ All seeds completed successfully!
📄 Results saved to: results_ls_default.txt


In [None]:
import os
import gymnasium as gym
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
import imageio

# ---------------- CONFIGURATION ----------------
env_id = "CartPole-v1"
total_timesteps = 200_000
n_eval_episodes = 100
base_model_dir = "models/ls_10k"
base_video_dir = "videos/ls_10k"
result_file = "results_ls_10k.txt"

seeds = [0, 7, 21, 42, 84]

# Create directories
os.makedirs(base_model_dir, exist_ok=True)
os.makedirs(base_video_dir, exist_ok=True)

# Create or overwrite result file
with open(result_file, "w") as f:
    f.write("Seed\tMeanReward\tStdReward\n")

# ---------------- LOOP THROUGH SEEDS ----------------
for seed in seeds:
    print(f"\n🚀 Training with SEED = {seed} ...")

    # 1️⃣ Create environment and apply seed
    env = DummyVecEnv([lambda: gym.make(env_id, render_mode=None, disable_env_checker=True)])
    env.seed(seed)

    # 2️⃣ Initialize model
    model = DQN(
        "MlpPolicy",
        env,
        learning_rate=5e-4,
        buffer_size=100_000,
        exploration_final_eps=0.02,
        exploration_fraction=0.3,
        target_update_interval=500,
        learning_starts=10_000,
        verbose=1,
        tensorboard_log="./logs/ls_10k",
        seed=seed
    )

    # 3️⃣ Train model
    model.learn(total_timesteps=total_timesteps)

    # 4️⃣ Evaluate trained model
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes)
    print(f"✅ Seed {seed}: Mean reward = {mean_reward:.2f} ± {std_reward:.2f}")

    # Append result to file
    with open(result_file, "a") as f:
        f.write(f"{seed}\t{mean_reward:.2f}\t{std_reward:.2f}\n")

    # 5️⃣ Save model to structured folder
    model_folder = os.path.join(base_model_dir, f"seed_{seed}")
    os.makedirs(model_folder, exist_ok=True)
    model_path = os.path.join(model_folder, f"dqn_cartpole_seed_{seed}")
    model.save(model_path)
    print(f"💾 Model saved at: {model_path}.zip")

    # 6️⃣ Record one full episode video
    seed_video_folder = os.path.join(base_video_dir, f"seed_val_{seed}")
    os.makedirs(seed_video_folder, exist_ok=True)

    env_vis = gym.make(env_id, render_mode="rgb_array")
    model = DQN.load(model_path, env=env_vis)

    frames = []
    obs, info = env_vis.reset(seed=seed)
    done, truncated = False, False
    total_reward = 0

    # 7️⃣ Run the agent and collect all frames until episode ends
    while not (done or truncated):
        frame = env_vis.render()
        frames.append(frame)
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env_vis.step(action)
        total_reward += reward

    env_vis.close()

    # 8️⃣ Save recorded video
    output_video = os.path.join(seed_video_folder, f"dqn_cartpole_seed_{seed}.mp4")
    imageio.mimsave(output_video, frames, fps=30)
    print(f"🎥 Video saved: {output_video} | Episode reward: {total_reward:.1f}")

print("\n✅ All seeds completed successfully!")
print(f"📄 Results saved to: {result_file}")



🚀 Training with SEED = 0 ...
Using cpu device
Logging to ./logs/ls_10k\DQN_1
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 4330     |
|    time_elapsed     | 0        |
|    total_timesteps  | 59       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 4824     |
|    time_elapsed     | 0        |
|    total_timesteps  | 135      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.996    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 4612     |
|    time_elapsed     | 0        |
|    total_timesteps  | 249      |
------------

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.99     |
| time/               |          |
|    episodes         | 28       |
|    fps              | 4812     |
|    time_elapsed     | 0        |
|    total_timesteps  | 609      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.989    |
| time/               |          |
|    episodes         | 32       |
|    fps              | 4909     |
|    time_elapsed     | 0        |
|    total_timesteps  | 695      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.987    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 4813     |
|    time_elapsed     | 0        |
|    total_timesteps  | 768      |
----------------------------------
----------------------------------
| rollout/          

In [5]:
import os
import gymnasium as gym
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
import imageio

# ---------------- CONFIGURATION ----------------
env_id = "CartPole-v1"
total_timesteps = 200_000
n_eval_episodes = 100
base_model_dir = "models/ls_2k"
base_video_dir = "videos/ls_2k"
result_file = "results_ls_2k.txt"

seeds = [0, 7, 21, 42, 84]

# Create directories
os.makedirs(base_model_dir, exist_ok=True)
os.makedirs(base_video_dir, exist_ok=True)

# Create or overwrite result file
with open(result_file, "w") as f:
    f.write("Seed\tMeanReward\tStdReward\n")

# ---------------- LOOP THROUGH SEEDS ----------------
for seed in seeds:
    print(f"\n🚀 Training with SEED = {seed} ...")

    # 1️⃣ Create environment
    env = DummyVecEnv([lambda: gym.make(env_id)])
    env.seed(seed)

    # 2️⃣ Initialize model
    model = DQN(
        "MlpPolicy",
        env,
        learning_rate=5e-4,
        buffer_size=100_000,
        exploration_final_eps=0.02,
        exploration_fraction=0.3,
        target_update_interval=500,
        learning_starts=2_000,
        verbose=0,
        tensorboard_log="./logs/",
        seed=seed
    )

    # 3️⃣ Train model
    model.learn(total_timesteps=total_timesteps)

    # 4️⃣ Evaluate trained model
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes)
    print(f"✅ Seed {seed}: Mean reward = {mean_reward:.2f} ± {std_reward:.2f}")

    # Append result to file
    with open(result_file, "a") as f:
        f.write(f"{seed}\t{mean_reward:.2f}\t{std_reward:.2f}\n")

    # 5️⃣ Save model in structured directory
    model_folder = os.path.join(base_model_dir, f"seed_{seed}")
    os.makedirs(model_folder, exist_ok=True)
    model_path = os.path.join(model_folder, f"dqn_cartpole_seed_{seed}")
    model.save(model_path)
    print(f"💾 Model saved at: {model_path}.zip")

    # 6️⃣ Record one full episode video
    seed_video_folder = os.path.join(base_video_dir, f"seed_val_{seed}")
    os.makedirs(seed_video_folder, exist_ok=True)

    env_vis = gym.make(env_id, render_mode="rgb_array")
    model = DQN.load(model_path, env=env_vis)

    frames = []
    obs, info = env_vis.reset(seed=seed)
    done, truncated = False, False
    total_reward = 0

    while not (done or truncated):
        frame = env_vis.render()
        frames.append(frame)
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env_vis.step(action)
        total_reward += reward

    env_vis.close()

    # Save video
    output_video = os.path.join(seed_video_folder, f"dqn_cartpole_seed_{seed}.mp4")
    imageio.mimsave(output_video, frames, fps=30)
    print(f"🎥 Video saved at: {output_video} | Episode reward: {total_reward:.1f}")

print("\n✅ All seeds completed successfully!")
print(f"📄 Results saved to: {result_file}")



🚀 Training with SEED = 0 ...




✅ Seed 0: Mean reward = 186.81 ± 5.35
💾 Model saved at: models/ls_2k\seed_0\dqn_cartpole_seed_0.zip




🎥 Video saved at: videos/ls_2k\seed_val_0\dqn_cartpole_seed_0.mp4 | Episode reward: 195.0

🚀 Training with SEED = 7 ...
✅ Seed 7: Mean reward = 44.91 ± 10.40
💾 Model saved at: models/ls_2k\seed_7\dqn_cartpole_seed_7.zip




🎥 Video saved at: videos/ls_2k\seed_val_7\dqn_cartpole_seed_7.mp4 | Episode reward: 44.0

🚀 Training with SEED = 21 ...
✅ Seed 21: Mean reward = 14.06 ± 3.07
💾 Model saved at: models/ls_2k\seed_21\dqn_cartpole_seed_21.zip




🎥 Video saved at: videos/ls_2k\seed_val_21\dqn_cartpole_seed_21.mp4 | Episode reward: 12.0

🚀 Training with SEED = 42 ...
✅ Seed 42: Mean reward = 77.56 ± 27.61
💾 Model saved at: models/ls_2k\seed_42\dqn_cartpole_seed_42.zip




🎥 Video saved at: videos/ls_2k\seed_val_42\dqn_cartpole_seed_42.mp4 | Episode reward: 13.0

🚀 Training with SEED = 84 ...
✅ Seed 84: Mean reward = 49.84 ± 19.60
💾 Model saved at: models/ls_2k\seed_84\dqn_cartpole_seed_84.zip




🎥 Video saved at: videos/ls_2k\seed_val_84\dqn_cartpole_seed_84.mp4 | Episode reward: 94.0

✅ All seeds completed successfully!
📄 Results saved to: results_ls_2k.txt
