# CartPole-v1

In [1]:
# 1) Imports
from src.utils.config import load_configs, make_env, make_agent
from src.train import ExperimentRunner, ExperimentResult
from src.agents.dqn import DQNAgent
import matplotlib.pyplot as plt
from pathlib import Path
import gymnasium as gym
import numpy as np
import torch
from src.evaluate import watch_agent
import yaml

In [2]:
import sys
from pathlib import Path

# Añade la ruta del proyecto (ajusta si tu notebook está en otra carpeta)
sys.path.append(str(Path().resolve().parent / "src"))

In [3]:
# 2) Create the environment
#    – render_mode='rgb_array' lets you grab frames (e.g. for plotting later)
env = gym.make("CartPole-v1", render_mode="rgb_array")

#### Action and State Space

In [4]:
# 3) Inspect spaces
print("Observation space:", env.observation_space)  # Box(4,)
print("Action space:", env.action_space)            # Discrete(2)

print('Space shape: ', env.observation_space.shape)
print('Action shape: ', env.action_space)

Observation space: Box([-4.8               -inf -0.41887903        -inf], [4.8               inf 0.41887903        inf], (4,), float32)
Action space: Discrete(2)
Space shape:  (4,)
Action shape:  Discrete(2)


#### Running an Episode of CartPole-V1

In [5]:
# 4) Reset and take a few random steps
obs, info = env.reset(seed=42)
frames = []
for step in range(10):
    action = env.action_space.sample()              # random action: 0 or 1
    obs, reward, done, truncated, info = env.step(action)
    frame  = env.render()
    print(frame.shape)
    frames.append(env.render())
    print(f"Step {step:02d} – obs={obs.round(2)}, reward={reward}, done={done}")
    if done:
        obs, info = env.reset() 

  from pkg_resources import resource_stream, resource_exists


(400, 600, 3)
Step 00 – obs=[ 0.03 -0.2   0.04  0.32], reward=1.0, done=False
(400, 600, 3)
Step 01 – obs=[ 0.02 -0.4   0.04  0.63], reward=1.0, done=False
(400, 600, 3)
Step 02 – obs=[ 0.02 -0.2   0.06  0.35], reward=1.0, done=False
(400, 600, 3)
Step 03 – obs=[ 0.01 -0.01  0.06  0.07], reward=1.0, done=False
(400, 600, 3)
Step 04 – obs=[ 0.01 -0.2   0.06  0.39], reward=1.0, done=False
(400, 600, 3)
Step 05 – obs=[ 0.01 -0.4   0.07  0.7 ], reward=1.0, done=False
(400, 600, 3)
Step 06 – obs=[-0.   -0.21  0.09  0.43], reward=1.0, done=False
(400, 600, 3)
Step 07 – obs=[-0.01 -0.01  0.09  0.16], reward=1.0, done=False
(400, 600, 3)
Step 08 – obs=[-0.01 -0.21  0.1   0.48], reward=1.0, done=False
(400, 600, 3)
Step 09 – obs=[-0.01 -0.02  0.11  0.22], reward=1.0, done=False


In [6]:
# 5) Close when done
env.close()

In [7]:
frames_np = np.stack(frames)  # Shape: (num_frames, alto, ancho, 3)
frames_tensor = torch.from_numpy(frames_np)  # Tensor de shape igual

# Si quieres que el canal de color sea la segunda dimensión (formato PyTorch: N, C, H, W):
frames_tensor = frames_tensor.permute(0, 3, 1, 2)  # (num_frames, 3, alto, ancho)

#### Training model with Experiment Implementation

In [8]:
config_dir = Path("../configs/")
cfg = load_configs(config_dir)
runner = ExperimentRunner(cfg)
all_results = runner.run()

CartPole-v1 (Seed 100): 100%|██████████| 15000/15000 [28:58<00:00,  8.63it/s]   

Saving model state to results/dqn_cartpole/final_model.pth...





In [13]:

with open("../configs/evaluate_config.yaml", 'r') as f:
        eval_config = yaml.safe_load(f)

watch_agent(eval_config, cfg['agent'])

Watching agent from results/dqn_cartpole/final_model.pth
Episode 1: Total Reward  =79.0
Episode 2: Total Reward  =14.0
Episode 3: Total Reward  =45.0
Episode 4: Total Reward  =15.0
Episode 5: Total Reward  =12.0


In [12]:
print(cfg['agent']['hyperparams'])

{'lr': 0.001, 'gamma': 0.99, 'batch_size': 16, 'replay_buffer_size': 10000, 'min_replay_buffer_size': 1000, 'target_update_freq': 200, 'epsilon_start': 1.0, 'epsilon_end': 0.05, 'epsilon_decay_steps': 20000}
