
# LunarLander — REINFORCE (Demo Notebook)

This notebook **calls the functions from your code** in
`src/reinforcement_learning/lunar_lander/` to train a simple REINFORCE agent
and **show the results** (reward curve, quick stats).

> Expected repo layout:
> `src/reinforcement_learning/lunar_lander/{train.py, eval.py, models.py, reinforce.py, utils.py}`  
> Figures are saved under `docs/images/` by `train.py`.


In [6]:
# ==== Sanity cell (prima cella del notebook) ====
import os, sys, importlib.util, platform

# 1) Assicura l’accesso alle DLL dell'env (necessario su Windows per pygame/SDL2)
dll_dir = os.path.join(sys.exec_prefix, "Library", "bin")
if os.name == "nt" and os.path.isdir(dll_dir):
    os.add_dll_directory(dll_dir)

# 2) Log di ambiente
print("OS:", platform.platform())
print("Python exe:", sys.executable)
print("Library\\bin in PATH:", any("Library\\bin" in p for p in os.environ.get("PATH","").split(";")))
print("KMP_DUPLICATE_LIB_OK:", os.environ.get("KMP_DUPLICATE_LIB_OK"))
print("OMP_NUM_THREADS:", os.environ.get("OMP_NUM_THREADS"))
print("MKL_NUM_THREADS:", os.environ.get("MKL_NUM_THREADS"))

# 3) Dove verrebbero caricati i moduli (path)
def spec_path(name):
    s = importlib.util.find_spec(name)
    return getattr(s, "origin", None) if s else None

print("Spec pygame   ->", spec_path("pygame"))
print("Spec gymnasium->", spec_path("gymnasium"))
print("Spec Box2D    ->", spec_path("Box2D"))
print("Spec torch    ->", spec_path("torch"))

# 4) Import "hard" con errori espliciti
try:
    import pygame
    import gymnasium as gym
    import Box2D
    import torch
    print(f"VERSIONS | pygame {pygame.__version__} | gym {gym.__version__} | torch {torch.__version__}")
    print("Torch threads:", torch.get_num_threads())
except Exception as e:
    import traceback; traceback.print_exc()
    raise SystemExit(
        "\n[FAIL] Import fallito. Se sei su Windows:\n"
        "- Assicurati di aprire Jupyter col tuo start_lab.bat (env vars + DLL PATH)\n"
        "- Kernel deve essere 'Python (ml-foundations)'\n"
        "- pygame/gym/Box2D devono puntare all'env ml-foundations nelle 'Spec' sopra.\n"
    )


OS: Windows-10-10.0.22621-SP0
Python exe: C:\Users\mtll\AppData\Local\anaconda3\envs\ml-foundations\python.exe
Library\bin in PATH: True
KMP_DUPLICATE_LIB_OK: TRUE
OMP_NUM_THREADS: 1
MKL_NUM_THREADS: 1
Spec pygame   -> C:\Users\mtll\AppData\Local\anaconda3\envs\ml-foundations\Lib\site-packages\pygame\__init__.py
Spec gymnasium-> C:\Users\mtll\AppData\Local\anaconda3\envs\ml-foundations\Lib\site-packages\gymnasium\__init__.py
Spec Box2D    -> C:\Users\mtll\AppData\Local\anaconda3\envs\ml-foundations\Lib\site-packages\Box2D\__init__.py
Spec torch    -> C:\Users\mtll\AppData\Local\anaconda3\envs\ml-foundations\Lib\site-packages\torch\__init__.py
VERSIONS | pygame 2.6.1 | gym 1.0.0 | torch 2.8.0+cpu
Torch threads: 1


In [7]:

from pathlib import Path
import sys

# Try to add the repo root (parent of 'src') to sys.path
cwd = Path.cwd()
root = cwd
# If notebook lives in repo/notebooks/, go one level up
if (cwd / "src").exists() is False and (cwd.name == "notebooks"):
    root = cwd.parent

if not (root / "src").exists():
    # walk up until we find 'src' or give up
    for p in cwd.parents:
        if (p / "src").exists():
            root = p
            break

sys.path.insert(0, str(root))
print(f"Repo root resolved to: {root}")
print("sys.path[0] ->", sys.path[0])

# Optional: verify the expected package exists
expected = root / "src" / "reinforcement_learning" / "lunar_lander"
print("lunar_lander path exists:", expected.exists())


Repo root resolved to: C:\Users\mtll\Documents\Personal Rebranding\ML-Foundations-for-Autonomous-Systems
sys.path[0] -> C:\Users\mtll\Documents\Personal Rebranding\ML-Foundations-for-Autonomous-Systems
lunar_lander path exists: True


In [8]:

# Import the training and evaluation entry points
try:
    from src.reinforcement_learning.lunar_lander.train import train
    from src.reinforcement_learning.lunar_lander.eval import play
    print("Imported train/play from src.reinforcement_learning.lunar_lander")
except Exception as e:
    print("Failed to import train/play. Please check that your files exist:")
    print("src/reinforcement_learning/lunar_lander/train.py and eval.py")
    raise


Imported train/play from src.reinforcement_learning.lunar_lander


In [9]:
returns = train(episodes=50, env_id="CartPole-v1")

[10/50] return=11.0 loss=0.000
[20/50] return=18.0 loss=0.000
[30/50] return=17.0 loss=0.000
[40/50] return=35.0 loss=0.000
[50/50] return=19.0 loss=0.000


## Train the agent

In [10]:

# WARNING: training for many episodes can take time.
# Start small (e.g., 200–300 episodes) just to produce a visible reward curve.
returns = train(episodes=30, env_id="LunarLander-v3", render_mode=None, save_ckpt=False)
len(returns), returns[-5:]


[10/30] return=-49.6 loss=-0.000
[20/30] return=-269.9 loss=0.003
[30/30] return=-124.2 loss=0.007


(30,
 [np.float64(-143.35930238677554),
  np.float64(-160.2385500666097),
  np.float64(-265.8496849662847),
  np.float64(-64.75937193197998),
  np.float64(-124.17923316817432)])

In [11]:
from pathlib import Path
from src.reinforcement_learning.lunar_lander.eval import rollout_frames
gif = rollout_frames(episodes=1)  # usa policy random se non hai ancora caricato un checkpoint
gif

WindowsPath('C:/Users/mtll/Documents/Personal Rebranding/ML-Foundations-for-Autonomous-Systems/docs/gifs/lunarlander_demo.gif')

## Plot training rewards

In [None]:

import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

plt.figure()
plt.plot(np.arange(len(returns)), returns, linewidth=1.0)
plt.xlabel("Episode"); plt.ylabel("Return")
plt.title("LunarLander — REINFORCE (training run)")
plt.tight_layout()
plt.show()


## Display saved figure (if generated by `train.py`)

In [None]:

img = Path(root) / "docs" / "images" / "lunarlander_rewards.png"
if img.exists():
    from IPython.display import Image, display
    display(Image(filename=str(img)))
else:
    print("No saved plot found at", img)



## Evaluate the policy (headless)

This uses your `eval.py`. If `eval.py` is set to use `render_mode="human"`,
it will try to open a window (which notebooks can't show).  
For a headless preview inside a notebook, you would need a version of `eval`
that creates the environment with `render_mode="rgb_array"` and returns frames.


In [None]:

# Run evaluation for a few episodes. If your eval() uses a GUI window,
# consider changing it to 'rgb_array' for notebook previews.
try:
    play(episodes=3)
    print("Evaluation run completed.")
except Exception as e:
    print("Evaluation failed (likely due to render mode). Error:")
    print(e)
