In [None]:
#Confirms main.py prints s1/s5 lines
MAIN = "/content/content/synth/main.py"   #change only if your path is different

with open(MAIN, "r", errors="ignore") as f:
    txt = f.read()
assert "s1:" in txt and "s5:" in txt, "Please apply the s1/s5 patch to main.py first."
print("Yes: main.py has s1/s5 in its progress print.")

In [None]:
#paths
MAIN   = "/content/content/synth/main.py" #keep consistent
LOGDIR = "/content/run_logs_full" #full-run logs here

import os, pathlib, shlex, subprocess
pathlib.Path(LOGDIR).mkdir(parents=True, exist_ok=True)

def run_and_log(tag, **kw):
    """
    Runs main.py with given kwargs and writes stdout to LOGDIR/{tag}.log
    """
    cmd = ["python", MAIN]
    for k, v in kw.items():
        cmd += [f"--{k}", str(v)]
    print("RUN:", " ".join(shlex.quote(c) for c in cmd))
    out = subprocess.run(cmd, text=True, capture_output=True)
    print(out.stdout)
    open(f"{LOGDIR}/{tag}.log", "w").write(out.stdout)
    if out.stderr.strip():
        print("STDERR:\n", out.stderr)

In [None]:
#Learning rates LR-finder and tests
LR_GD   = "1e-3"
LR_ADAM = "1e-3"

#Long iteration counts
LONG_ITERS_GD   = 800_000
LONG_ITERS_ADAM = 30_000

#Shared synthetic configuration
BASE_SYN = dict(
    method="DLNN",
    data="gaussian", #noiseless synthetic
    N=100, #problem size
    rank=5,
    reg_norm="ratio",
    initscale="1e-3",
    sample_size=4000,
    log_interval=2000 #fewer log lines -> smaller files
)

In [None]:
'''for depth in (2, 5):
    # GD (solid in plots)
    run_and_log(f"F1_gd_d{depth}_lam0",
        **BASE_SYN, depth=depth, lam="0.0", optim="GD",   lr=LR_GD,   niters=LONG_ITERS_GD)

    # Adam (dashed in plots)
    run_and_log(f"F1_adam_d{depth}_lam0",
        **BASE_SYN, depth=depth, lam="0.0", optim="Adam", lr=LR_ADAM, niters=LONG_ITERS_ADAM)
        '''

In [None]:
'''for opt, lr, steps in (("Adam", LR_ADAM, LONG_ITERS_ADAM), ("GD", LR_GD, LONG_ITERS_GD)):
    run_and_log(f"F2_{opt.lower()}_d3_lam1e-4",
        **BASE_SYN, depth=3, lam="1e-4", optim=opt, lr=lr, niters=steps)
        '''

In [None]:
for lam in ("0.0", "1e-2"):
    run_and_log(f"F3_gd_d1_lam{lam}",
        **BASE_SYN, depth=1, lam=lam, optim="GD",   lr=LR_GD,   niters=LONG_ITERS_GD)
    run_and_log(f"F3_adam_d1_lam{lam}",
        **BASE_SYN, depth=1, lam=lam, optim="Adam", lr=LR_ADAM, niters=LONG_ITERS_ADAM)

In [None]:
#Fetch ML-100k (safe no-op if present)
import os, urllib.request, zipfile, io, pathlib
ML_DIR = "/content/ml-100k"
if not os.path.exists(f"{ML_DIR}/u.data"):
    print("Downloading MovieLens 100k …")
    data = urllib.request.urlopen("https://files.grouplens.org/datasets/movielens/ml-100k.zip").read()
    zipfile.ZipFile(io.BytesIO(data)).extractall("/content")
    assert os.path.exists(f"{ML_DIR}/u.data"), "MovieLens 100k not found after download."
print("✔ MovieLens ready.")

BASE_ML = dict(
    method="DLNN",
    data="ml-100k-sample",
    depth=1,
    optim="Adam",
    lr="5e-4", #working LR
    trainprop="0.8",
    reg_norm="ratio",
    initscale="1e-3",
    log_interval=1000
)

#λ = 0 vs 1e-3
run_and_log("ML_lam0_adam_long",    **BASE_ML, lam="0.0",  niters=40_000)
run_and_log("ML_lam1e-3_adam_long", **BASE_ML, lam="1e-3", niters=40_000)

In [None]:
import os, re, glob, pandas as pd, numpy as np

LOG_GLOBS = [os.path.join(LOGDIR, "*.log")]
files = sorted({p for g in LOG_GLOBS for p in glob.glob(g)})

pat = re.compile(
    r"depth:\s*(\d+).*?iteration:\s*(\d+).*?"
    r"test_RMSE:\s*([\-0-9\.Ee\+]+).*?erank:\s*([\-0-9\.Ee\+]+).*?"
    r"s1:\s*([\-0-9\.Ee\+]+).*?s5:\s*([\-0-9\.Ee\+]+)"
)

def infer_opt(name):
    n = name.lower()
    if "adam" in n: return "adam"
    if "gd"   in n: return "gd"
    return "unk"

rows = []
for path in files:
    with open(path, "r", errors="ignore") as f:
        for line in f:
            m = pat.search(line)
            if not m: continue
            d,it,rmse,er,s1,s5 = m.groups()
            rows.append(dict(
                file=os.path.basename(path),
                optimizer=infer_opt(path),
                depth=int(d),
                iteration=int(it),
                test_RMSE=float(rmse),
                erank=float(er),
                s1=float(s1),
                s5=float(s5),
            ))

df = pd.DataFrame(rows).sort_values(["optimizer","depth","iteration"]).reset_index(drop=True)
print(f"Parsed {len(df)} rows from {len(files)} log files.")
if df.empty:
    raise RuntimeError("No rows parsed – make sure runs completed and s1/s5 are printed.")
df.tail(10)

In [None]:
import matplotlib.pyplot as plt

def plot_sv_panel(opt_name, title):
    sub = df[df["optimizer"]==opt_name]
    if sub.empty: return
    plt.figure(figsize=(6.2,4.4))
    for d in sorted(sub["depth"].unique()):
        cur = sub[sub["depth"]==d]
        if cur.empty: continue
        plt.plot(cur["iteration"], cur["s1"], label=f"depth={d} (s1)")
        plt.plot(cur["iteration"], cur["s5"], linestyle="--", label=f"depth={d} (s5)")
    plt.title(title); plt.xlabel("iteration"); plt.ylabel("magnitude")
    plt.legend(bbox_to_anchor=(1.02,1), loc="upper left"); plt.tight_layout(); plt.show()

plot_sv_panel("gd",   "GD: singular values")
plot_sv_panel("adam", "Adam: singular values")

In [None]:
def plot_metric(metric, title):
    plt.figure(figsize=(6.6,4.4))
    for opt in ("gd","adam"):
        for d in sorted(df["depth"].unique()):
            cur = df[(df.optimizer==opt) & (df.depth==d)]
            if cur.empty: continue
            ls = "-" if opt=="gd" else "--"
            plt.plot(cur["iteration"], cur[metric], ls, label=f"{opt.upper()} d={d}")
    plt.xlabel("iteration"); plt.ylabel(metric); plt.title(title)
    plt.legend(bbox_to_anchor=(1.02,1), loc="upper left"); plt.tight_layout(); plt.show()

plot_metric("test_RMSE", "Test RMSE vs iteration")
plot_metric("erank",     "Effective rank (erank) vs iteration")

In [None]:
ml = df[df["file"].str.contains("ML_", na=False)]
assert not ml.empty, "No MovieLens rows parsed. Run Cell 6 first."

#Extract λ from filename for labeling
def lab(name):
    return "λ=1e-3" if "lam1e-3" in name else "λ=0"

def plot_ml(metric, title):
    plt.figure(figsize=(6.6,4.4))
    for key, g in ml.groupby("file"):
        g = g.sort_values("iteration")
        plt.plot(g["iteration"], g[metric], "--", label=f"Adam d=1, {lab(key)}")
    plt.xlabel("iteration"); plt.ylabel(metric); plt.title(title)
    plt.legend(); plt.tight_layout(); plt.show()

plot_ml("test_RMSE", "MovieLens: test_RMSE")
plot_ml("erank",     "MovieLens: erank")

#summary table
last = (ml.sort_values(["file","iteration"])
          .groupby("file").tail(1)
          [["file","iteration","test_RMSE","erank","s1","s5"]])
print("=== MovieLens last metrics ===")
print(last.to_string(index=False))

In [None]:
last_syn = (df[~df["file"].str.contains("ML_", na=False)]
              .sort_values(["optimizer","depth","iteration"])
              .groupby(["optimizer","depth"]).tail(1)
              [["optimizer","depth","iteration","test_RMSE","erank","s1","s5"]])
print("=== Last metrics (synthetic, per optimizer & depth) ===")
print(last_syn.to_string(index=False))