In [None]:
from pytorch_lightning import Trainer
from lightning.pytorch.loggers import WandbLogger
from pytorch_lightning.strategies import DDPStrategy
from time import perf_counter
import wandb
import numpy as np
import matplotlib.pyplot as plt

from src.algorithms.unrolling_policy_iteration import UnrollingPolicyIterationTrain
from src.environments import CliffWalkingEnv
from src.algorithms.generalized_policy_iteration import PolicyIterationTrain

GROUP_NAME = "N_Unrolls"
SAVE = True
PATH = "results/n_unrolls/"

## Auxiliary functions

In [None]:
def plot_errors(errs, N_unrolls, Exps, skip_idx=[]):

    plt.figure(figsize=(8, 5))
    
    for i, exp in enumerate(Exps):
        if i in skip_idx:
            continue
        label = exp.get("name", f"Exp {i}")
        plt.plot(N_unrolls, errs[i], marker='o', label=label)
    
    plt.xlabel("Number of unrolls")
    plt.ylabel("Q Error")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def get_optimal_q(max_eval_iters=50, max_epochs=50, use_logger=True, log_every_n_steps=1):
    env = CliffWalkingEnv()
    model = PolicyIterationTrain(env, gamma=0.99, max_eval_iters=max_eval_iters)

    if use_logger:
            logger = WandbLogger(
            project="rl-unrolling",
            name=f"Optimal_pol-{max_eval_iters}eval-{max_epochs}impr",
            group=GROUP_NAME
        )
    else:
        logger = False

    trainer = Trainer(
        max_epochs=max_epochs,
        log_every_n_steps=log_every_n_steps,
        accelerator='cpu',
        logger=logger,
    )
    
    trainer.fit(model, train_dataloaders=None)
    wandb.finish()
    return model.q.detach()

def run(g, N_unrolls, Exps, q_opt, use_logger=True, log_every_n_steps=1, verbose=False):
    err1 = np.zeros((len(Exps), N_unrolls.size))
    err2 = np.zeros((len(Exps), N_unrolls.size))
    bell_err = np.zeros((len(Exps), N_unrolls.size))
    
    use_logger = use_logger and g == 0

    for i, n_unrolls in enumerate(N_unrolls):
        n_unrolls = int(n_unrolls)
        for j, exp in enumerate(Exps):
            env = CliffWalkingEnv()

            if exp["model"] == "unroll":
                model = UnrollingPolicyIterationTrain(env=env, env_test=env, num_unrolls=n_unrolls, **exp["args"])
                if use_logger:
                    logger = WandbLogger(project="rl-unrolling", name=f"{exp['name']}-{n_unrolls}unrolls",
                                         group=GROUP_NAME)
                else:
                    logger = False
                trainer = Trainer(max_epochs=5000, log_every_n_steps=log_every_n_steps, accelerator="auto",
                                  strategy=DDPStrategy(find_unused_parameters=True), logger=logger)

            elif exp["model"] == "pol-it":
                model = PolicyIterationTrain(env=env, **exp["args"])
                if use_logger:
                    logger = WandbLogger(project="rl-unrolling", name=f"{exp['name']}-{n_unrolls}impr",
                                         group=GROUP_NAME)
                else:
                    logger = False
                trainer = Trainer(max_epochs=int(n_unrolls), log_every_n_steps=log_every_n_steps, accelerator='cpu', logger=logger)
            else:
                raise Exception("Unknown model")

            trainer.fit(model)
            wandb.finish()

            err1[j,i], err2[j,i] = model.test_pol_err(q_opt)
            bell_err[j,i] = model.bellman_error.cpu().numpy()

            if verbose:
                print(f"- {g}. Unrolls {n_unrolls}: Model: {exp["name"]} Err1: {err1[j,i]:.3f} | bell_err: {bell_err[j,i]:.3f}")
    return err1, err2, bell_err


## Running different models

In [3]:
verbose = True
use_logger = True
log_every_n_steps = 1

N_unrolls = np.array([1, 3, 5, 7, 9])  #np.arange(1,16)
Exps = [
    {"model": "pol-it", "args": {"max_eval_iters": 1}, "name": "val-it"},
    {"model": "pol-it", "args": {"max_eval_iters": 10}, "name": "pol-it-10eval"},
    {"model": "pol-it", "args": {"max_eval_iters": 20}, "name": "pol-it-20eval"},

    # {"model": "unroll", "args": {"K": 5, "tau": 5, "lr": 5e-3, "weight_sharing": True}, "name": "unr-K5-WS"},
    {"model": "unroll", "args": {"K": 10, "tau": 5, "lr": 5e-3, "weight_sharing": True}, "name": "unr-K10-WS"},
    {"model": "unroll", "args": {"K": 20, "tau": 5, "lr": 5e-3, "weight_sharing": True}, "name": "unr-K20-WS"},

    # {"model": "unroll", "args": {"K": 5, "tau": 5, "lr": 5e-3, "weight_sharing": False}, "name": "unr-5"},
    {"model": "unroll", "args": {"K": 10, "tau": 5, "lr": 5e-3, "weight_sharing": False}, "name": "unr-K10"},
    {"model": "unroll", "args": {"K": 20, "tau": 5, "lr": 5e-3, "weight_sharing": False}, "name": "unr-K20"},
]

q_opt = get_optimal_q(use_logger=use_logger, log_every_n_steps=log_every_n_steps)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
[34m[1mwandb[0m: Currently logged in as: [33msamuel-rey[0m ([33msamuel-rey-lab[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/core/optimizer.py:183: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name | Type | Params | Mode
-------------------------------------
-------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 21:   0%|          | 0/1 [00:00<?, ?it/s, v_num=qew7, bellman_error=0.000, policy_diff=0.000, q_norm=709.0]          



Epoch 49: 100%|██████████| 1/1 [00:00<00:00, 227.57it/s, v_num=qew7, bellman_error=0.000, policy_diff=0.000, q_norm=709.0]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 1/1 [00:00<00:00, 137.25it/s, v_num=qew7, bellman_error=0.000, policy_diff=0.000, q_norm=709.0]


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
bellman_error,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
policy_diff,█▆▅▅▅▄▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
q_norm,█▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
bellman_error,0.0
epoch,49.0
policy_diff,0.0
q_norm,708.56281
trainer/global_step,49.0


In [4]:
n_runs = 1

errs1 = np.zeros((n_runs, len(Exps), N_unrolls.size))
errs2 = np.zeros((n_runs, len(Exps), N_unrolls.size))
bell_errs = np.zeros((n_runs, len(Exps), N_unrolls.size))

t_init = perf_counter()
for g in range(n_runs):
    errs1[g], errs2[g], bell_errs[g] = run(g, N_unrolls, Exps, q_opt, use_logger, log_every_n_steps, verbose)

t_end = perf_counter()
print(f'----- Solved in {(t_end-t_init)/60:.3f} minutes -----')

if SAVE:
    file_name = PATH + "n_unrolls.npz"
    np.savez(file_name, N_unrolls=N_unrolls, Exps=Exps, errs1=errs1, errs2=errs2)
    print("Data saved as:", file_name)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/core/optimizer.py:183: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name | Type | Params | Mode
-------------------------------------
-------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 173.77it/s, v_num=agc7, bellman_error=232.0, policy_diff=6.000, q_norm=625.0]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 111.20it/s, v_num=agc7, bellman_error=232.0, policy_diff=6.000, q_norm=625.0]


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
bellman_error,▁
epoch,▁
policy_diff,▁
q_norm,▁
trainer/global_step,▁▁

0,1
bellman_error,232.46497
epoch,0.0
policy_diff,6.0
q_norm,624.61908
trainer/global_step,0.0


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 1: Model: val-it Err1: 0.030 | bell_err: 232.465


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/core/optimizer.py:183: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name | Type | Params | Mode
-------------------------------------
-------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 170.10it/s, v_num=l5dy, bellman_error=124.0, policy_diff=6.000, q_norm=1.84e+3]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 103.80it/s, v_num=l5dy, bellman_error=124.0, policy_diff=6.000, q_norm=1.84e+3]

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.





0,1
bellman_error,▁
epoch,▁
policy_diff,▁
q_norm,▁
trainer/global_step,▁▁

0,1
bellman_error,123.9696
epoch,0.0
policy_diff,6.0
q_norm,1836.20728
trainer/global_step,0.0


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 1: Model: pol-it-10eval Err1: 3.110 | bell_err: 123.970


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/core/optimizer.py:183: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name | Type | Params | Mode
-------------------------------------
-------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 159.64it/s, v_num=1zuv, bellman_error=108.0, policy_diff=6.000, q_norm=2.91e+3]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 97.86it/s, v_num=1zuv, bellman_error=108.0, policy_diff=6.000, q_norm=2.91e+3] 

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.





0,1
bellman_error,▁
epoch,▁
policy_diff,▁
q_norm,▁
trainer/global_step,▁▁

0,1
bellman_error,108.47051
epoch,0.0
policy_diff,6.0
q_norm,2909.38208
trainer/global_step,0.0


Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


- 0. Unrolls 1: Model: pol-it-20eval Err1: 11.145 | bell_err: 108.471


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name       | Type                         | Params | Mode 
--------------------------------------------------------------------
0 | model      | UnrolledPolicyIterationModel | 11     | train
1 | model_test | UnrolledPolicyIterationModel | 11     | train
--------------------------------------------------------------------
22        Trainable params
0         Non-trainable params
22        Total params
0.000     Total estimated model params size (MB)
8         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 63.87it/s, v_num=jdgu, reward_smoothness=0.980, bellman_error=23.10, loss=2.780] 

`Trainer.fit` stopped: `max_epochs=5000` reached.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 43.05it/s, v_num=jdgu, reward_smoothness=0.980, bellman_error=23.10, loss=2.780]
P_pi is NOT diagonalizable


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
bellman_error,█▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█
loss,█▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
reward_smoothness,▁▇▇▅█▇▇▇▇▇▇█▇▇▇▇▅▇▅▇▇▇▇█▇▇▇▇▇▇▇▅▇▆▇▇▇▇█▆
trainer/global_step,▁▁▁▁▂▂▂▂▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇▇█████

0,1
bellman_error,23.11369
epoch,4999.0
loss,2.78251
reward_smoothness,0.97973
trainer/global_step,4999.0


Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 1: Model: unr-K10-WS Err1: 1.593 | bell_err: 23.115


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name       | Type                         | Params | Mode 
--------------------------------------------------------------------
0 | model      | UnrolledPolicyIterationModel | 21     | train
1 | model_test | UnrolledPolicyIterationModel | 21     | train
--------------------------------------------------------------------
42        Trainable params
0         Non-trainable params
42        Total params
0.000     Total estimated model params size (MB)
8         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 57.25it/s, v_num=dguv, reward_smoothness=0.980, bellman_error=27.80, loss=4.030] 

`Trainer.fit` stopped: `max_epochs=5000` reached.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 39.89it/s, v_num=dguv, reward_smoothness=0.980, bellman_error=27.80, loss=4.030]
P_pi is NOT diagonalizable


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
bellman_error,██▅▄▃▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
loss,██▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
reward_smoothness,▁▄██████████████████████████████████████
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇██

0,1
bellman_error,27.81977
epoch,4999.0
loss,4.03094
reward_smoothness,0.97973
trainer/global_step,4999.0


Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 1: Model: unr-K20-WS Err1: 1.139 | bell_err: 27.820


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name       | Type                         | Params | Mode 
--------------------------------------------------------------------
0 | model      | UnrolledPolicyIterationModel | 11     | train
1 | model_test | UnrolledPolicyIterationModel | 11     | train
--------------------------------------------------------------------
22        Trainable params
0         Non-trainable params
22        Total params
0.000     Total estimated model params size (MB)
8         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 79.38it/s, v_num=kpvu, reward_smoothness=0.980, bellman_error=22.80, loss=2.720] 

`Trainer.fit` stopped: `max_epochs=5000` reached.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 52.19it/s, v_num=kpvu, reward_smoothness=0.980, bellman_error=22.80, loss=2.720]
P_pi is NOT diagonalizable


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
bellman_error,█▇▆▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇███
loss,█▇▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
reward_smoothness,▁▇██▇▇▇▇▇▇█▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇▆▇▇▇▇▇▇▆▇▇
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▇▇▇▇█████

0,1
bellman_error,22.84658
epoch,4999.0
loss,2.71857
reward_smoothness,0.97973
trainer/global_step,4999.0


Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 1: Model: unr-K10 Err1: 1.593 | bell_err: 22.848


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name       | Type                         | Params | Mode 
--------------------------------------------------------------------
0 | model      | UnrolledPolicyIterationModel | 21     | train
1 | model_test | UnrolledPolicyIterationModel | 21     | train
--------------------------------------------------------------------
42        Trainable params
0         Non-trainable params
42        Total params
0.000     Total estimated model params size (MB)
8         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 73.71it/s, v_num=cseg, reward_smoothness=0.980, bellman_error=27.60, loss=3.960] 

`Trainer.fit` stopped: `max_epochs=5000` reached.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 48.29it/s, v_num=cseg, reward_smoothness=0.980, bellman_error=27.60, loss=3.960]
P_pi is NOT diagonalizable


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
bellman_error,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇█
loss,█▇▆▅▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
reward_smoothness,▆▁▂▄████████████████████████████████████
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇███

0,1
bellman_error,27.5831
epoch,4999.0
loss,3.96264
reward_smoothness,0.97973
trainer/global_step,4999.0


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 1: Model: unr-K20 Err1: 1.139 | bell_err: 27.583


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/core/optimizer.py:183: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name | Type | Params | Mode
-------------------------------------
-------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 187.72it/s, v_num=qpt3, bellman_error=13.10, policy_diff=2.000, q_norm=638.0]

`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 113.90it/s, v_num=qpt3, bellman_error=13.10, policy_diff=2.000, q_norm=638.0]

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.





0,1
bellman_error,█▁▁
epoch,▁▅█
policy_diff,█▁▁
q_norm,▁▄█
trainer/global_step,▁▅██

0,1
bellman_error,13.12596
epoch,2.0
policy_diff,2.0
q_norm,637.81061
trainer/global_step,2.0


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 3: Model: val-it Err1: 0.020 | bell_err: 13.126


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/core/optimizer.py:183: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name | Type | Params | Mode
-------------------------------------
-------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 178.16it/s, v_num=yivt, bellman_error=7.530, policy_diff=4.690, q_norm=1.01e+3]

`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 106.09it/s, v_num=yivt, bellman_error=7.530, policy_diff=4.690, q_norm=1.01e+3]

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.





0,1
bellman_error,█▁▁
epoch,▁▅█
policy_diff,█▅▁
q_norm,█▁▁
trainer/global_step,▁▅██

0,1
bellman_error,7.53322
epoch,2.0
policy_diff,4.69042
q_norm,1006.38165
trainer/global_step,2.0


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 3: Model: pol-it-10eval Err1: 0.336 | bell_err: 7.533


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/core/optimizer.py:183: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name | Type | Params | Mode
-------------------------------------
-------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 183.76it/s, v_num=8wvp, bellman_error=1.190, policy_diff=4.690, q_norm=1.67e+3]

`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 112.27it/s, v_num=8wvp, bellman_error=1.190, policy_diff=4.690, q_norm=1.67e+3]

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.





0,1
bellman_error,█▁▁
epoch,▁▅█
policy_diff,█▅▁
q_norm,█▁▁
trainer/global_step,▁▅██

0,1
bellman_error,1.18766
epoch,2.0
policy_diff,4.69042
q_norm,1666.77075
trainer/global_step,2.0


Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 3: Model: pol-it-20eval Err1: 2.543 | bell_err: 1.188


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name       | Type                         | Params | Mode 
--------------------------------------------------------------------
0 | model      | UnrolledPolicyIterationModel | 11     | train
1 | model_test | UnrolledPolicyIterationModel | 11     | train
--------------------------------------------------------------------
22        Trainable params
0         Non-trainable params
22        Total params
0.000     Total estimated model params size (MB)
16        Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 47.09it/s, v_num=47v8, reward_smoothness=0.980, bellman_error=10.70, loss=0.598] 

`Trainer.fit` stopped: `max_epochs=5000` reached.


Epoch 4999: 100%|██████████| 1/1 [00:00<00:00, 36.61it/s, v_num=47v8, reward_smoothness=0.980, bellman_error=10.70, loss=0.598]
P_pi is NOT diagonalizable


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
bellman_error,█▅▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█
loss,█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
reward_smoothness,▁▅▅▇▇▇▇▅▅▇▇▇▇▇▇▇▇▇▇▇▅▆▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
trainer/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇███

0,1
bellman_error,10.71898
epoch,4999.0
loss,0.59842
reward_smoothness,0.97973
trainer/global_step,4999.0


Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


- 0. Unrolls 3: Model: unr-K10-WS Err1: 0.000 | bell_err: 10.719


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name       | Type                         | Params | Mode 
--------------------------------------------------------------------
0 | model      | UnrolledPolicyIterationModel | 21     | train
1 | model_test | UnrolledPolicyIterationModel | 21     | train
--------------------------------------------------------------------
42        Trainable params
0         Non-trainable params
42        Total params
0.000     Total estimated model params size (MB)
16        Modules in train mode
0         Modules in eval mode
/home/srey/Investigacion/rl-unrolling/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=167` in the `DataLoader` to improve performance.


Epoch 4046: 100%|██████████| 1/1 [00:00<00:00, 50.97it/s, v_num=ibhm, reward_smoothness=0.980, bellman_error=14.70, loss=1.120] 


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x73fe131d43b0>> (for post_run_cell), with arguments args (<ExecutionResult object at 73fe24b63aa0, execution_count=4 error_before_exec=None error_in_exec=name 'exit' is not defined info=<ExecutionInfo object at 73fe24b600b0, raw_cell="n_runs = 1

errs1 = np.zeros((n_runs, len(Exps), N.." transformed_cell="n_runs = 1

errs1 = np.zeros((n_runs, len(Exps), N.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://ssh-remote%2Beuler-dssp.eif.urjc.es/home/srey/Investigacion/rl-unrolling/influence_unroll.ipynb#X30sdnNjb2RlLXJlbW90ZQ%3D%3D> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

In [None]:
# # Load data
# data = np.load(PATH + "data_v2.npz", allow_pickle=True)
# N_unrolls = data["N_unrolls"]
# Exps = data["Exps"]
# errs1 = data["errs1"]
# errs2 = data["errs2"]

In [None]:
skip_idx = []
plot_errors(np.mean(errs1, axis=0), N_unrolls, Exps, skip_idx=skip_idx)
plot_errors(np.mean(errs2, axis=0), N_unrolls, Exps, skip_idx=skip_idx)
plot_errors(np.mean(bell_errs, axis=0), N_unrolls, Exps, skip_idx=skip_idx)
