In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd /content/drive/MyDrive/RL_project
!pip install -e .


/content/drive/.shortcut-targets-by-id/1h0pj4tgXhS4adjv_t9lBWE5rd07JBIgc/RL_project
Obtaining file:///content/drive/.shortcut-targets-by-id/1h0pj4tgXhS4adjv_t9lBWE5rd07JBIgc/RL_project
  Preparing metadata (setup.py) ... [?25l[?25hdone
Installing collected packages: movierec_rl
  Attempting uninstall: movierec_rl
    Found existing installation: movierec_rl 0.1.0
    Uninstalling movierec_rl-0.1.0:
      Successfully uninstalled movierec_rl-0.1.0
  Running setup.py develop for movierec_rl
Successfully installed movierec_rl-0.1.0


In [3]:
%%writefile /content/drive/MyDrive/RL_project/movierec_rl/__init__.py
"""
movierec_rl package marker.
"""
__version__ = "0.1.0"


Overwriting /content/drive/MyDrive/RL_project/movierec_rl/__init__.py


In [4]:
import sys, os, pathlib

# Path to the folder that **contains** the package directory
project_root = pathlib.Path("/content/drive/MyDrive/RL_project")
sys.path.append(str(project_root))

import movierec_rl
print("Package import ✔")

Package import ✔


In [5]:
yaml_text = """

device: cpu
seed: 42
state_dim: 84

# ------------------------------------------------------------------
env:
  max_steps: 30
  reward_mode: "shaped"
  slate_size: 5
  movies_per_genre: 50

# ------------------------------------------------------------------
agent:
  hidden_dim: 256
  batch_size: 64          # ↑ larger mini-batch for stabler updates
  lr: 0.0001
  gamma: 0.99
  epsilon_start: 1.0
  epsilon_end: 0.05
  epsilon_decay_steps: 20000
  tau: 0.01              # ← NEW - Polyak soft-update rate
  target_update_freq: 0  # hard sync no longer used (kept for completeness)

# ------------------------------------------------------------------
train:
  episodes: 500          # ↑ give the agent enough interactions
  checkpoint_every: 50
  log_every: 10
  save_dir: checkpoints/

# ------------------------------------------------------------------
replay_buffer:
  capacity: 10000
  per: true
  alpha: 0.6
  beta_start: 0.4
  beta_end: 1.0
  beta_steps: 100000
  eps_priority: 1e-6

# ------------------------------------------------------------------
eval:
  episodes: 100
  ckpt_file: ep0500.pt    # after the longer training
  csv_out: eval/metrics.csv
  engagement_thresholds: [0.1, 0.2, 0.3, 0.4, 0.5]
  # engagement_threshold: 0.3   # (fallback single value)

# ------------------------------------------------------------------
logging:
  enable: true            # turn off if you don't want TensorBoard
  log_dir: runs/


"""

with open("movierec_rl/config.yaml", "w") as f:
    f.write(yaml_text.strip())


In [None]:
from movierec_rl.train import load_cfg, train_agent
from movierec_rl.evaluate import evaluate

cfg = load_cfg("movierec_rl/config.yaml")
train_agent(cfg)
evaluate(cfg)

📥 Pre-filling buffer with greedy policy...
✅ Buffer pre-filled: 660 transitions
[Ep 010] avg_ret=0.58 avg_loss=0.0029 ε=0.986 watch=0.05 skips=0.53
[Ep 020] avg_ret=0.91 avg_loss=0.0029 ε=0.971 watch=0.05 skips=0.67
[Ep 030] avg_ret=0.52 avg_loss=0.0040 ε=0.957 watch=0.05 skips=0.93
[Ep 040] avg_ret=0.77 avg_loss=0.0069 ε=0.943 watch=0.05 skips=0.73
[Ep 050] avg_ret=1.03 avg_loss=0.0111 ε=0.929 watch=0.06 skips=0.73
[Ep 060] avg_ret=1.15 avg_loss=0.0157 ε=0.914 watch=0.04 skips=0.87
[Ep 070] avg_ret=1.19 avg_loss=0.0181 ε=0.900 watch=0.04 skips=0.80
[Ep 080] avg_ret=0.86 avg_loss=0.0199 ε=0.886 watch=0.03 skips=0.73
[Ep 090] avg_ret=1.04 avg_loss=0.0211 ε=0.872 watch=0.03 skips=1.07
[Ep 100] avg_ret=0.81 avg_loss=0.0205 ε=0.857 watch=0.07 skips=0.80
[Ep 110] avg_ret=0.46 avg_loss=0.0206 ε=0.843 watch=0.03 skips=0.87
[Ep 120] avg_ret=0.38 avg_loss=0.0194 ε=0.829 watch=0.03 skips=0.80
[Ep 130] avg_ret=0.84 avg_loss=0.0181 ε=0.815 watch=0.05 skips=0.87
[Ep 140] avg_ret=0.51 avg_loss=0.017

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./runs


<IPython.core.display.Javascript object>

In [None]:
from tensorboard.backend.event_processing import event_accumulator
import pandas as pd, glob, os, pathlib
from functools import reduce

runs_root = "runs"       # TensorBoard logs root directory
out_dir   = "tb_csv"     # Output directory for CSV files
pathlib.Path(out_dir).mkdir(exist_ok=True)

# --- Collect all event files and build one DataFrame per run+tag ---
dfs = []   # We'll gather all partial tables here

for ev_path in glob.glob(f"{runs_root}/**/events.out.tfevents.*", recursive=True):
    ea = event_accumulator.EventAccumulator(ev_path, size_guidance={"scalars": 0})
    ea.Reload()
    run = os.path.dirname(ev_path).split(os.sep)[-1]   # e.g. 20250610-130713

    # Iterate over all scalar tags
    for tag in ea.Tags()["scalars"]:
        tmp = pd.DataFrame(ea.Scalars(tag))
        # columns: ['wall_time','step','value']
        tmp.rename(columns={"step": "Step", "value": f"{run}/{tag}"}, inplace=True)
        tmp.drop(columns=["wall_time"], inplace=True)
        dfs.append(tmp)

# --- Merge on the "Step" column (outer join on everything present) ---
full = reduce(lambda left, right: pd.merge(left, right, on="Step", how="outer"), dfs)
full.sort_values("Step", inplace=True)          # Sort steps ascending
full.reset_index(drop=True, inplace=True)

# --- Save merged CSVs ---
full_csv = f"{out_dir}/all_runs_merged.csv"
full.to_csv(full_csv, index=False)
print(f"✅ saved merged CSV to: {full_csv}")

# --- Optional: Pivot table (columns = run/tag, rows = Step) w/o NA ---
pivot_csv = f"{out_dir}/all_runs_merged_pivot.csv"
full.fillna(method="ffill", inplace=True)       # Forward-fill missing values
full.to_csv(pivot_csv, index=False)
print(f"✅ saved pivot-style CSV to: {pivot_csv}")

✅ saved merged CSV to: tb_csv/all_runs_merged.csv


  full.fillna(method="ffill", inplace=True)       # Forward-fill missing values


✅ saved pivot-style CSV to: tb_csv/all_runs_merged_pivot.csv


In [None]:
!python -m movierec_rl.evaluate  --config movierec_rl/config.yaml

In [None]:
!python -m movierec_rl.plot
