# A

In [None]:
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import jax
from flax import nnx
import optax

from src.rts.config import EnvConfig, RewardConfig
from src.rl.pqn import Params, train_minibatched, Model
from src.rl.eval import evaluate_batch

from carbs import CARBS, CARBSParams, ObservationInParam, Param, LogSpace, LogitSpace, LinearSpace

import mlflow

In [None]:
width = 10
height = 10
config = EnvConfig(
    num_players=2,
    board_width = width,
    board_height = height,
    num_neutral_bases = 3,
    num_neutral_troops_start = 5,
    neutral_troops_min = 4,
    neutral_troops_max = 10,
    player_start_troops=5,
    bonus_time=10,
    reward_config=RewardConfig(),
)

In [None]:
param_spaces = [
    Param(
        name="num_iterations",
        space=LinearSpace(is_integer=True, min=1, max=5000, scale=100),
        search_center=40,
    ),
    Param(
        name="lr",
        space=LogSpace(scale=0.5),
        search_center=4e-4,
    ),
    Param(
        name="gamma",
        space=LogitSpace(),
        search_center=0.99,
    ),
    Param(
        name="q_lambda",
        space=LogitSpace(),
        search_center=0.92,
    ),
    Param(
        name="num_envs",
        space=LinearSpace(is_integer=True, min=1, max=5000, scale=100),
        search_center=200,
    ),
    Param(
        name="update_epochs",
        space=LinearSpace(is_integer=True, min=1, max=8, scale=4),
        search_center=1,
    ),
    Param(
        name="num_minibatches",
        space=LinearSpace(is_integer=True, min=1, max=16, scale=8),
        search_center=4,
    ),
    Param(
        name="epsilon",
        space=LogitSpace(),
        search_center=0.3,
    ),
]

carbs_params = CARBSParams(
    better_direction_sign=1,
    is_wandb_logging_enabled=False,
    resample_frequency=0,
)
carbs = CARBS(carbs_params, param_spaces)

In [None]:
history = []
num_runs = 50
for _ in range(num_runs):
    suggestion = carbs.suggest().suggestion

    params = Params(
        num_iterations=suggestion["num_iterations"],
        lr=suggestion["lr"],
        gamma=suggestion["gamma"],
        q_lambda=suggestion["q_lambda"],
        num_envs=suggestion["num_envs"],
        num_steps=250,
        update_epochs=suggestion["update_epochs"],
        num_minibatches=suggestion["num_minibatches"],
        epsilon=suggestion["epsilon"],
    )

    q_net = Model(width * height * 4, 512, width * height * 4, rngs=nnx.Rngs(0))
    optimizer = nnx.Optimizer(q_net, optax.adam(params.lr))
    pre_time = time.time()
    q_net, losses, cum_returns = train_minibatched(q_net, optimizer, config, params)
    time_used = time.time() - pre_time
    results = evaluate_batch(
        q_net, config, jax.random.PRNGKey(0), batch_size=10, num_steps=250
    )
    output = np.mean(results)

    obs_out = carbs.observe(
        ObservationInParam(input=suggestion, output=output, cost=time_used)
    )
    history.append({
        **suggestion,    
        "output": output,       
        "cost": time_used      
    })

In [None]:
df = pd.DataFrame(history)

best_over_time = df['output'].cummax()
plt.figure()
plt.plot(best_over_time)
plt.title('Best Performance Over Iterations')
plt.xlabel('Iteration')
plt.ylabel('Best Output')
plt.show()

plt.figure()
plt.scatter(df['cost'], df['output'])
plt.title('Cost vs. Performance')
plt.xlabel('Time Used (s)')
plt.ylabel('Output')
plt.show()

In [None]:
mlflow.set_experiment("carbs_indefinite")   # 1️⃣ groups all runs together

iter_no = 0
while True:                                 # 2️⃣ infinite loop
    iter_no += 1
    suggestion = carbs.suggest().suggestion

    params = Params(
        num_iterations = suggestion["num_iterations"],
        lr             = suggestion["lr"],
        gamma          = suggestion["gamma"],
        q_lambda       = suggestion["q_lambda"],
        num_envs       = suggestion["num_envs"],
        num_steps      = 250,
        update_epochs  = suggestion["update_epochs"],
        num_minibatches= suggestion["num_minibatches"],
        epsilon        = suggestion["epsilon"],
    )

    with mlflow.start_run():                # 3️⃣ one run per CARBS trial
        mlflow.log_params(suggestion)       # ← all hyper‑parameters

        t0 = time.time()
        q_net = Model(width*height*4, 512, width*height*4, rngs=nnx.Rngs(0))
        opt   = nnx.Optimizer(q_net, optax.adam(params.lr))
        q_net, *_ = train_minibatched(q_net, opt, config, params)
        runtime = time.time() - t0

        output = float(np.mean(
            evaluate_batch(q_net, config, jax.random.PRNGKey(0),
                           batch_size=10, num_steps=250)
        ))

        carbs.observe(ObservationInParam(
            input=suggestion, output=output, cost=runtime))

        # --- log metrics for the UI ------------------------------------
        mlflow.log_metric("output", output, step=iter_no)
        mlflow.log_metric("cost_sec", runtime, step=iter_no)

In [None]:
history