In [153]:
%load_ext autoreload
%autoreload 2
from rectangle import Rectangle
from environment import RectangleEnv
from optimizer import StepOptimizer
from algorithms import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
from sanity_tests import *
from example_generator import make_cases, build_envs_from_cases
import numpy as np

In [5]:
if __name__ == "__main__":
    # Run the sanity checks for the Rectangle class
    test_rectangle_distance()
    # Run the sanity checks for the RectangleEnv
    sanity_check_step_optimizer()

    env = build_env()
    env.render()

    # 1) state invariants on reset
    env.reset()
    check_encode_state_invariants(env)

    # 2) axis isolation (uses apply_action only)
    check_axis_isolation(env)

    # 3) rollout that prints distances, rewards, and invariants
    rollout_and_print(env, steps=18)

    # 4) terminal checks
    check_terminal_conditions(env)

    print("\n[done] If something looks off (e.g., reward != expected), tweak compute_reward or the alpha/beta used.")

In [18]:
# rect_params = dict(center=(0, 0), width=2.0, height=1.0, theta=np.pi/6)
# optimizer_params = dict(tau=0.1, loss="soft_l1", bounds=(-5, 5))
# offsetX, offsetY = np.random.uniform(-2, 3), np.random.uniform(-2, 3)
# rect = Rectangle(**rect_params)
# import matplotlib.pyplot as plt
# fig, ax = plt.subplots()
# rect.plot(ax=ax, color='blue', alpha=0.5)
# rect.move(dx=offsetX, dy=offsetY)
# points = rect.sample_points(num_points=10, jitter=0.0, rng=np.random.default_rng(42)) 
# ax.scatter(points[:, 0], points[:, 1], color='red', label='Sampled Points')
# env = RectangleEnv(rect_params=rect_params, optimizer_params=optimizer_params, points=points)

In [97]:
def run_algo(algo_name, example_name, env = RectangleEnv, episodes=500, **kwargs):
    """
    Convenience function to run an algorithm on the given environment.
    """
    if algo_name == 'sarsa':
        Q, logs = train_sarsa(env, episodes=episodes, **kwargs)
    elif algo_name == 'q':
        Q, logs = train_q_learning(env, episodes=episodes, **kwargs)
    else:
        raise ValueError(f"Unknown algorithm: {algo_name}")
    metrics = run_episode(env, Q, render=True)
    display(env.log)
    print("greedy eval:", metrics)
    env.show_gif(filename=f"gifs_{algo_name}/{example_name}_animation.gif")

In [145]:
params = {
    'episodes': 500,
    'alpha': 0.1,
    'gamma': 1,
    'eps_start': 0.2,
    'eps_end': 0.01,
    'eps_decay': 0.995,
    'seed': 0
}
rect_params = dict(center=(0, 0), width=2.0, height=1.0, theta=np.pi/6)
optimizer_params = dict(tau=0.1, loss="soft_l1", bounds=(-5, 5))

cases = make_cases(rect_params, optimizer_params, num_points=20, jitter=0.01)
envs  = build_envs_from_cases(cases)

In [1]:
name, env = list(envs.items())[-1]
run_algo('sarsa',name, env, **params)


In [2]:
for name, env in list(envs.items()):
    print(f"-----------------{name}------------------")
    run_algo("sarsa", name,  env, **params)
    print(f"-----------------------------------------\n")

In [3]:
for name, env in envs.items():
    print(f"-----------------{name}------------------")
    run_algo("q", name, env, **params)
    print(f"-----------------------------------------\n")

In [4]:
# from algorithms import train_sarsa, train_q_learning, run_episode
# # Train SARSA
Q, logs = train_sarsa(env, episodes=500, alpha=0.1, gamma=1,
                      eps_start=0.2, eps_end=0.01, eps_decay=0.995, seed=0)
# Q, logs = train_q_learning(env, episodes=400, alpha=0.1, gamma=1,
#                       eps_start=0.2, eps_end=0.01, eps_decay=0.995, seed=0)

# Inspect a few metrics
print("last 5 episode returns:", [d['return'] for d in logs[-5:]])
print("last 5 iters used:    ", [d['iters']  for d in logs[-5:]])
print("success ratio:", sum(d['solved'] for d in logs[-50:]), "/ last 50")

# Evaluate/render one greedy episode and get the total optimizer iterations used
metrics = run_episode(env, Q, render=True)
print("greedy eval:", metrics)