In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir('..')

In [3]:
import wandb
from omegaconf import OmegaConf

### Download locally the best models from wandb

---

In [4]:
CKPT_DIR = "/scratch/izar/cizinsky/rl-for-kinetics/best_models"

In [5]:
!mkdir -p {CKPT_DIR}

#### Get overview of the relevant runs

In [None]:
api = wandb.Api()

runs = api.runs("ludekcizinsky/rl-renaissance")

tagged_runs = [run for run in runs if "part1" in run.tags]

for run in tagged_runs:
    print(f"{run.id} | {run.name} | tags: {run.tags}")

#### Download the best model for each run

In [None]:
run_ids = [run.id for run in tagged_runs]

for run_id in run_ids:
    print(f"Downloading checkpoint for run {run_id}...")
    run = next((run for run in runs if run.id == run_id), None)
    assert run is not None, "Run not found!"

    artifact_path = f"ludekcizinsky/rl-renaissance/{run.name}:v0"
    print(artifact_path)
    artifact = api.artifact(artifact_path, type="model")
    os.makedirs(f"{CKPT_DIR}/{run.name}", exist_ok=True)
    download_path = f"{CKPT_DIR}/{run.name}"

    # Config
    run_cfg = OmegaConf.create(run.config)
    OmegaConf.save(run_cfg, f"{download_path}/config.yaml")

    # Checkpoints
    if os.path.exists(download_path):
        print(f"Checkpoint for run {run.name} already exists in {download_path}.")
        continue
    artifact.download(download_path)
    print(f"Downloaded checkpoint to {download_path}.")

### Inference

---

```bash
apptainer shell --nv --bind "$(pwd)":/home/renaissance/work --bind "/scratch/izar/$USER/rl-for-kinetics/output:/home/renaissance/output" --bind "/scratch/izar/$USER/rl-for-kinetics/best_models:/home/renaissance/best_models" /scratch/izar/$USER/images/renaissance_with_ml.sif
```

```bash
jupyter notebook --ip=0.0.0.0 --port=8888 --no-browser
````

In [4]:
!ls /home/renaissance/best_models

celestial-dragon-134  fragrant-plasma-133  sandy-blaze-130     vibrant-oath-142
chocolate-sponge-135  hearty-pine-137	   stilted-lake-138
comfy-terrain-128     olive-forest-140	   sweet-eon-143
crisp-sunset-141      resilient-oath-127   upbeat-thunder-139


In [5]:
!ls /home/renaissance/best_models/resilient-oath-127

config.yaml  policy.pt	value.pt


In [6]:
import torch
from helpers.utils import setup_kinetic_env, get_incidence_rate
from helpers.ppo_agent import PolicyNetwork

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-jo7x88oq because the default path (/scratch/izar/cizinsky/.cache/hf/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [7]:
selected_model = "resilient-oath-127"
model_path = f"/home/renaissance/best_models/{selected_model}/policy.pt"
config_path = f"/home/renaissance/best_models/{selected_model}/config.yaml"

In [8]:
cfg = OmegaConf.load(config_path)

In [9]:
kinetic_env = setup_kinetic_env(cfg)

--------------------------------------------------
env:
  p_size: 384
  action_scale: 1
seed: 42
paths:
  names_km: data/varma_ecoli_shikki/parameter_names_km_fdp1.pkl
  output_dir: /home/renaissance/output
  met_model_name: varma_ecoli_shikki
device: cpu
logger:
  tags:
  - baseline
  entity: ludekcizinsky
  project: rl-renaissance
method:
  name: ppo_refinement
  actor_lr: 0.0003
  clip_eps: 0.2
  critic_lr: 0.001
  gae_lambda: 0.98
  parameter_dim: 384
  discount_factor: 0.99
  value_loss_weight: 0.5
reward:
  eig_partition: -2.5
training:
  batch_size: 25
  num_epochs: 10
  num_episodes: 100
  max_grad_norm: 0.5
  save_trained_models: true
  max_steps_per_episode: 50
  n_eval_samples_in_episode: 50
launch_cmd: train.py
constraints:
  max_km: 3
  min_km: -25
  ss_idx: 1712
lr_scheduler:
  name: constant

--------------------------------------------------
FYI: Loading kinetic and thermodynamic data.


In [10]:
kinetic_env.logging_enabled = False

In [11]:
device = "cpu"

In [12]:
policy_net = PolicyNetwork(cfg)
policy_net.load_pretrained_policy_net(model_path)
policy_net.eval()
policy_net.to(device)

FYI: Loaded pretrained policy network from /home/renaissance/best_models/resilient-oath-127/policy.pt.


PolicyNetwork(
  (base): Sequential(
    (0): Linear(in_features=384, out_features=1536, bias=True)
    (1): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
    (2): ReLU()
    (3): Linear(in_features=1536, out_features=1536, bias=True)
    (4): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
    (5): ReLU()
  )
  (mean_head): Sequential(
    (0): Linear(in_features=1536, out_features=384, bias=True)
    (1): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
  )
  (log_std_head): Sequential(
    (0): Linear(in_features=1536, out_features=384, bias=True)
    (1): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
  )
)

In [17]:
max_steps_per_episode = 10 # cfg.training.max_steps_per_episode
num_samples = 50
for i in range(10):
    incidence_rate, all_max_eigs = get_incidence_rate(kinetic_env, policy_net, max_steps_per_episode, num_samples, device)
    print(f"[Eval {i}] Incidence rate: {incidence_rate}")

Getting best setup:   0%|          | 0/30 [00:00<?, ?it/s]

Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.50it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:27<00:00,  3.62it/s]


[Eval 0] Incidence rate: 0.0


Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.51it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:27<00:00,  3.62it/s]


[Eval 1] Incidence rate: 0.0


Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.53it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:27<00:00,  3.62it/s]


[Eval 2] Incidence rate: 0.0


Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.54it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:27<00:00,  3.62it/s]


[Eval 3] Incidence rate: 0.0


Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.52it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:28<00:00,  3.57it/s]


[Eval 4] Incidence rate: 0.66


Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.48it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:27<00:00,  3.59it/s]


[Eval 5] Incidence rate: 0.0


Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.50it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:28<00:00,  3.56it/s]


[Eval 6] Incidence rate: 0.58


Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.50it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:27<00:00,  3.59it/s]


[Eval 7] Incidence rate: 0.18


Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.49it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:27<00:00,  3.58it/s]


[Eval 8] Incidence rate: 0.91


Getting best setup: 100%|██████████| 30/30 [00:08<00:00,  3.50it/s]
Evaluating best setup: 100%|██████████| 100/100 [00:27<00:00,  3.59it/s]

[Eval 9] Incidence rate: 0.49



