In [None]:
# Cell 1 - Install (run ONCE)

!apt-get -qq update
!apt-get -qq install -y swig > /dev/null
!pip -q install "gymnasium[box2d]==0.29.1" "stable-baselines3==2.3.2" "mobiu-q"
!mobiu-q activate "YOUR_KEY_HERE"

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m374.4/374.4 kB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m953.9/953.9 kB[0m [31m66.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m182.3/182.3 kB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.9/1.9 MB

In [None]:
# Cell 2 - Imports

import gymnasium as gym
import numpy as np
import torch

from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.utils import set_random_seed

from mobiu_q import MobiuOptimizer

print(f"PyTorch: {torch.__version__}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
print("Imports OK")

PyTorch: 2.9.0+cu126
GPU: NVIDIA L4
Imports OK


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  return datetime.utcnow().replace(tzinfo=utc)


In [None]:
# Cell 3 - SB3-Compatible Mobiu Wrapper
#
# The key insight: SB3 calls optimizer.step() without arguments.
# MobiuOptimizer.step(metric) expects a metric.
#
# Solution: Create a wrapper that:
# 1. Stores the latest episode return (set by callback)
# 2. Passes it to MobiuOptimizer when SB3 calls step()

class SB3MobiuWrapper:
    """Bridges SB3's optimizer.step() with MobiuOptimizer.step(metric)."""

    def __init__(self, base_optimizer, method="adaptive", use_soft_algebra=True,
                 maximize=True, sync_interval=50, verbose=True):
        self.mobiu = MobiuOptimizer(
            base_optimizer,
            license_key="e756ce65-186e-4747-aaaf-5a1fb1473b7e",
            method=method,
            use_soft_algebra=use_soft_algebra,
            maximize=maximize,
            sync_interval=sync_interval,
            verbose=verbose
        )
        self._base_optimizer = base_optimizer
        self._latest_metric = None
        self._step_count = 0
        self._metric_updates = 0

    def step(self, closure=None):
        self._step_count += 1
        self.mobiu.step(self._latest_metric)

        # DEBUG: ◊õ◊ú 1000 steps
        if self._step_count % 1000 == 0:
            lr = self._base_optimizer.param_groups[0]['lr']
            print(f"[DEBUG] step={self._step_count}, metric={self._latest_metric}, LR={lr:.6f}")

    def zero_grad(self, set_to_none=False):
        self._base_optimizer.zero_grad(set_to_none=set_to_none)

    def set_metric(self, metric):
        self._latest_metric = metric
        self._metric_updates += 1

        # DEBUG: ◊õ◊ú ◊¢◊ì◊õ◊ï◊ü metric
        if self._metric_updates <= 10 or self._metric_updates % 50 == 0:
            lr = self._base_optimizer.param_groups[0]['lr']
            print(f"[DEBUG] set_metric #{self._metric_updates}: metric={metric:.1f}, LR={lr:.6f}")

    def end(self):
        print(f"[DEBUG] Total: {self._step_count} steps, {self._metric_updates} metric updates")
        self.mobiu.end()

    @property
    def param_groups(self):
        return self._base_optimizer.param_groups

    @property
    def state(self):
        return self._base_optimizer.state


class MobiuSB3Callback(BaseCallback):
    """
    Callback that:
    1. Wraps SB3's optimizer with Mobiu on training start
    2. Updates the metric when episodes complete
    """

    def __init__(self, method="adaptive", use_soft_algebra=True,
                 sync_interval=50, verbose=0):
        super().__init__(verbose=verbose)
        self.method = method
        self.use_soft_algebra = use_soft_algebra
        self.sync_interval = sync_interval
        self._wrapper = None
        self._ep_returns = []
        self._update_count = 0

    def _on_training_start(self) -> None:
        # Get the original optimizer
        base_opt = self.model.policy.optimizer

        print(f"[Mobiu] Wrapping optimizer: {type(base_opt).__name__}")
        print(f"[Mobiu] Initial LR: {base_opt.param_groups[0]['lr']:.6f}")

        # Create wrapper
        self._wrapper = SB3MobiuWrapper(
            base_opt,
            method=self.method,
            use_soft_algebra=self.use_soft_algebra,
            maximize=True,
            sync_interval=self.sync_interval,
            verbose=True
        )

        # CRITICAL: Replace SB3's optimizer with our wrapper!
        self.model.policy.optimizer = self._wrapper

        print(f"[Mobiu] Optimizer replaced successfully")

    def _on_step(self) -> bool:
        # Check for completed episodes
        infos = self.locals.get("infos", [])
        for info in infos:
            if isinstance(info, dict) and "episode" in info:
                ep_info = info["episode"]
                if isinstance(ep_info, dict) and "r" in ep_info:
                    ep_return = float(ep_info["r"])
                    self._ep_returns.append(ep_return)

                    # Update the wrapper's metric
                    if self._wrapper is not None:
                        # Use rolling average of last 4 episodes
                        recent = self._ep_returns[-4:] if len(self._ep_returns) >= 4 else self._ep_returns
                        mean_return = np.mean(recent)
                        self._wrapper.set_metric(mean_return)

                        self._update_count += 1
                        if self._update_count <= 5:
                            lr = self._wrapper.param_groups[0]['lr']
                            print(f"[Mobiu] Episode {len(self._ep_returns)}: return={ep_return:.1f}, mean={mean_return:.1f}, LR={lr:.6f}")

        return True

    def _on_training_end(self) -> None:
        if self._wrapper is not None:
            print(f"[Mobiu] Training complete. Total episodes: {len(self._ep_returns)}")
            if self._ep_returns:
                print(f"[Mobiu] Final mean return (last 10): {np.mean(self._ep_returns[-10:]):.1f}")
            self._wrapper.end()
            self._wrapper = None


print("SB3 Mobiu integration defined")

SB3 Mobiu integration defined


In [None]:
# Cell 4 - Run functions

def run_one(seed: int, env_id: str, total_steps: int, lr: float, use_mobiu: bool):
    set_random_seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    env = gym.make(env_id)
    env.reset(seed=seed)
    env.action_space.seed(seed)
    env.observation_space.seed(seed)

    model = PPO(
        "MlpPolicy",
        env,
        learning_rate=lr,
        n_steps=2048,
        batch_size=64,
        n_epochs=10,
        gamma=0.99,
        gae_lambda=0.95,
        clip_range=0.2,
        ent_coef=0.0,
        vf_coef=0.5,
        max_grad_norm=0.5,
        verbose=0,
        seed=seed,
        device="auto",
    )

    # Use our new callback for Mobiu
    cb = MobiuSB3Callback(
        method="adaptive",
        use_soft_algebra=True,
        sync_interval=50,
        verbose=0
    ) if use_mobiu else None

    model.learn(total_timesteps=total_steps, callback=cb)

    eval_env = gym.make(env_id)
    eval_env.reset(seed=seed + 10_000)
    mean_r, std_r = evaluate_policy(model, eval_env, n_eval_episodes=20, deterministic=True)

    env.close()
    eval_env.close()

    return float(mean_r), float(std_r)


def run_ab(env_id="CartPole-v1", seeds=(0,1,2), total_steps=100_000, lr=3e-4):
    print(f"\n{'='*60}")
    print(f"ENV: {env_id} | Steps: {total_steps} | LR: {lr}")
    print(f"{'='*60}\n")

    baseline_results = []
    mobiu_results = []

    for s in seeds:
        print(f"\n{'='*60}")
        print(f"SEED {s}")
        print(f"{'='*60}")

        print("\n[Baseline - Adam]")
        m0, sd0 = run_one(s, env_id, total_steps, lr, use_mobiu=False)
        print(f"Baseline result: {m0:.1f} +/- {sd0:.1f}")

        print("\n[Mobiu-Q]")
        m1, sd1 = run_one(s, env_id, total_steps, lr, use_mobiu=True)
        print(f"Mobiu result: {m1:.1f} +/- {sd1:.1f}")

        baseline_results.append(m0)
        mobiu_results.append(m1)

        delta = m1 - m0
        winner = "Mobiu" if delta > 0 else "Baseline"
        print(f"\n>>> Delta: {delta:+.1f} | Winner: {winner}")

    # Summary
    b0 = np.array(baseline_results)
    b1 = np.array(mobiu_results)
    d = b1 - b0

    print(f"\n{'='*60}")
    print("FINAL SUMMARY")
    print(f"{'='*60}")
    print(f"Baseline (Adam): {b0.mean():.2f} +/- {b0.std():.2f}")
    print(f"Mobiu-Q:         {b1.mean():.2f} +/- {b1.std():.2f}")
    print(f"Delta:           {d.mean():+.2f} +/- {d.std():.2f}")
    print(f"Win rate:        {(d > 0).sum()}/{len(d)} ({100*(d>0).mean():.0f}%)")

    if d.mean() > 0:
        pct = 100 * d.mean() / (abs(b0.mean()) + 1e-9)
        print(f"\nüèÜ Mobiu-Q wins by {pct:.1f}%!")
    else:
        print(f"\nüìä Baseline wins")

    return b0, b1, d

print("Functions defined")

Functions defined


In [None]:
# Cell 6 - RUN TEST on LunarLander (harder)

b0_lunar, b1_lunar, d_lunar = run_ab(
    env_id="LunarLander-v3",
    seeds=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
    total_steps=200_000,
    lr=3e-4
)


ENV: LunarLander-v3 | Steps: 200000 | LR: 0.0003


SEED 0

[Baseline - Adam]




Baseline result: 181.1 +/- 49.9

[Mobiu-Q]
[Mobiu] Wrapping optimizer: Adam
[Mobiu] Initial LR: 0.000300
üöÄ Mobiu-Q Hybrid session started (Pro tier) [method=adaptive, base_lr=0.0003, sync=50]
[Mobiu] Optimizer replaced successfully
[DEBUG] set_metric #1: metric=-205.7, LR=0.000300
[Mobiu] Episode 1: return=-205.7, mean=-205.7, LR=0.000300
[DEBUG] set_metric #2: metric=-147.8, LR=0.000300
[Mobiu] Episode 2: return=-89.9, mean=-147.8, LR=0.000300
[DEBUG] set_metric #3: metric=-132.3, LR=0.000300
[Mobiu] Episode 3: return=-101.3, mean=-132.3, LR=0.000300
[DEBUG] set_metric #4: metric=-127.9, LR=0.000300
[Mobiu] Episode 4: return=-114.9, mean=-127.9, LR=0.000300
[DEBUG] set_metric #5: metric=-107.5, LR=0.000300
[Mobiu] Episode 5: return=-123.7, mean=-107.5, LR=0.000300
[DEBUG] set_metric #6: metric=-89.2, LR=0.000300
[DEBUG] set_metric #7: metric=-98.1, LR=0.000300
[DEBUG] set_metric #8: metric=-101.1, LR=0.000300
[DEBUG] set_metric #9: metric=-107.7, LR=0.000300
[DEBUG] set_metric #10: