In [5]:
from functools import partial

import hydra
from omegaconf import DictConfig, OmegaConf

from renaissance.kinetics.jacobian_solver import check_jacobian

from helpers.ppo_agent import PPOAgent
from helpers.env import KineticEnv
from helpers.utils import reward_func, load_pkl

import logging
from hydra import initialize, compose
initialize(config_path="configs", version_base="1.1")


hydra.initialize()

In [10]:
cfg = compose(config_name="train.yaml")

In [7]:
print("-" * 50)
print(OmegaConf.to_yaml(cfg))  # print config to verify
print("-" * 50)

# Call solvers from SKimPy
chk_jcbn = check_jacobian()
logging.disable(logging.CRITICAL)

# Integrate data
print("FYI: Loading kinetic and thermodynamic data.")
chk_jcbn._load_ktmodels(cfg.paths.met_model_name, 'fdp1') # Load kinetic and thermodynamic data
chk_jcbn._load_ssprofile(cfg.paths.met_model_name, 'fdp1', cfg.constraints.ss_idx) # Integrate steady state information

# Logger setup, todo: for now disabled, else we would get w&b run object
logger = None # get_logger(cfg)


--------------------------------------------------
method:
  parameter_dim: 384
  latent_dim: 99
  name: ppo_refinement
  actor_lr: 0.0003
  critic_lr: 0.001
  discount_factor: 0.99
  gae_lambda: 0.98
  clip_eps: 0.2
  value_loss_weight: 0.5
  entropy_loss_weight: 0.0
seed: 42
device: cpu
logger:
  project: rl-renaissance
  entity: ludekcizinsky
  tags:
  - dev
paths:
  names_km: data/varma_ecoli_shikki/parameter_names_km_fdp1.pkl
  output_dir: /home/renaissance/output
  met_model_name: varma_ecoli_shikki
constraints:
  min_km: -25
  max_km: 3
  ss_idx: 1712
reward:
  eig_partition: -2.5
env:
  p0_init_mean: 0
  p0_init_std: 0.1
  p_size: 384
training:
  num_episodes: 100
  max_steps_per_episode: 50
  batch_size: 25
  num_epochs: 10
  max_grad_norm: 0.5

--------------------------------------------------
FYI: Loading kinetic and thermodynamic data.


In [None]:
# Initialize environment
names_km = load_pkl(cfg.paths.names_km)
reward_fn = partial(reward_func, chk_jcbn, names_km, cfg.reward.eig_partition)
env = KineticEnv(cfg, reward_fn)
env.seed(cfg.seed)

In [11]:
cfg.paths.output_dir

'/home/renaissance/work/output'

In [13]:
import numpy as np

np.save(cfg.paths.output_dir + 'lol.npy', [1, 2, 3])

In [None]:
# Initialize PPO agent (actor and critic)
ppo_agent = PPOAgent(cfg, logger)

collect_rewards = []
collect_policy_loss = []
collect_value_loss = []
# Training loop
for episode in range(cfg.training.num_episodes):
    trajectory = ppo_agent.collect_trajectory(env)
    rewards = trajectory["rewards"]
    collect_rewards.append(rewards)
    min_rew, max_rew, mean_rew = rewards.min(), rewards.max(), rewards.mean()
    print(f"Episode {episode+1}/{cfg.training.num_episodes} - Min reward: {min_rew:.4f}, Max reward: {max_rew:.4f}, Mean reward: {mean_rew:.4f}, Last reward: {rewards[-1]:.4f}")

    policy_loss, value_loss, entropy = ppo_agent.update(trajectory)
    print(f"Episode {episode+1}/{cfg.training.num_episodes} - Policy loss: {policy_loss:.4f}, Value loss: {value_loss:.4f}, Entropy: {entropy:.4f}")
