In [4]:
#Before cell 1
import torch

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))
    print("GPU count:", torch.cuda.device_count())

Torch version: 2.7.0+cu128
CUDA available: True
GPU name: NVIDIA GeForce RTX 5080
GPU count: 1


In [6]:
#cell 1
#Confirms main.py prints s1/s5 lines

from pathlib import Path

#CHANGE THIS if needed:
#Point BASE_DIR to the folder that contains the "content" directory.
#If you open VS Code in the project root (where "content" lives),
#Path.cwd() is fine.
BASE_DIR = Path.cwd() #or Path(r"C:\path\to\your\project")

MAIN = BASE_DIR / "content" / "synth" / "main.py" #adjust this if your layout is different

with open(MAIN, "r", errors="ignore") as f:
    txt = f.read()
assert "s1:" in txt and "s5:" in txt, "Please apply the s1/s5 patch to main.py first."
print("Yes: main.py has s1/s5 in its progress print.")


Yes: main.py has s1/s5 in its progress print.


In [7]:
#cell 2
#paths

from pathlib import Path
import os, shlex, subprocess, sys

#Same base dir idea as above. Make sure this matches Cell 1.
BASE_DIR = Path.cwd() #or Path(r"C:\path\to\your\project")

MAIN   = BASE_DIR / "content" / "synth" / "main.py" #adjust if needed
LOGDIR = BASE_DIR / "run_logs_full" #full-run logs here

LOGDIR.mkdir(parents=True, exist_ok=True)

def run_and_log(tag, **kw):
    """
    Runs main.py with given kwargs and writes stdout to LOGDIR/{tag}.log
    """
    #Use the same Python interpreter as the notebook
    cmd = [sys.executable, str(MAIN)]
    for k, v in kw.items():
        cmd += [f"--{k}", str(v)]
    print("RUN:", " ".join(shlex.quote(c) for c in cmd))
    out = subprocess.run(cmd, text=True, capture_output=True)
    print(out.stdout)

    log_path = LOGDIR / f"{tag}.log"
    with open(log_path, "w") as f:
        f.write(out.stdout)

    if out.stderr.strip():
        print("STDERR:\n", out.stderr)


In [9]:
#cell 3
#Learning rates
LR_GD   = "1e-3"
LR_ADAM = "1e-3"

#Iterations
LONG_ITERS_GD   = 800_000
LONG_ITERS_ADAM = 30_000

#Logging
LOG_EVERY_ADAM = 1000
LOG_EVERY_GD   = 20000 #800k / 20k = 40 points

#Shared synthetic configuration
BASE_SYN = dict(
    method="DLNN",
    data="gaussian", #noiseless synthetic (matrix completion)
    N=100,
    rank=5,
    reg_norm="ratio",
    initscale="1e-3",
    sample_size=2000,
    seed=67
)

In [5]:
#cell 4A - depth with penalty
#ADAM Fixed ONLY
#Depths {2,3,4,5} with fixed λ = 1e-4
for depth in (2, 3, 4, 5):
    run_and_log(f"SYN_adam_d{depth}_lam1e-4_fixed",
        **BASE_SYN, depth=depth, lam="1e-4",
        optim="Adam", lr=LR_ADAM, niters=LONG_ITERS_ADAM, log_interval=LOG_EVERY_ADAM)

# Fig.3: Adam, depth=1, λ in {0, 1e-2}
for lam in ("0.0", "1e-2"):
    run_and_log(f"F3_adam_d1_lam{lam}",
        **BASE_SYN, depth=1, lam=lam,
        optim="Adam", lr=LR_ADAM, niters=LONG_ITERS_ADAM, log_interval=LOG_EVERY_ADAM)


RUN: 'c:\Users\mattw\.conda\envs\mlproj\python.exe' 'c:\Users\mattw\Desktop\experiment\content\synth\main.py' --method DLNN --data gaussian --N 100 --rank 5 --reg_norm ratio --initscale 1e-3 --sample_size 2000 --seed 67 --depth 2 --lam 1e-4 --optim Adam --lr 1e-3 --niters 30000 --log_interval 1000
depth: 2, iteration: 0, test_MSE: 1.0109117031097412, test_RMSE: 1.0054410490475019, erank: 66.73628997802734, s1: 0.0026257652789354324, s5: 0.002133142203092575

depth: 2, iteration: 1000, test_MSE: 0.348835825920105, test_RMSE: 0.5906232520990898, erank: 23.023950576782227, s1: 38.17356491088867, s5: 19.732196807861328

depth: 2, iteration: 2000, test_MSE: 0.3312409818172455, test_RMSE: 0.5755353871112058, erank: 21.826580047607422, s1: 38.79084014892578, s5: 20.106565475463867

depth: 2, iteration: 3000, test_MSE: 0.3031250536441803, test_RMSE: 0.5505679373557638, erank: 20.026620864868164, s1: 39.8005485534668, s5: 20.753402709960938

depth: 2, iteration: 4000, test_MSE: 0.26329895853996

In [6]:
#cell 4B - depth with penalty
#ADAM (cosine only)
for depth in (1, 2, 3, 4, 5):
    run_and_log(f"SYN_adam_d{depth}_lam1e-4_cos",
        **BASE_SYN, depth=depth, lam="1e-4", lam_schedule="cosine",
        optim="Adam", lr=LR_ADAM, niters=LONG_ITERS_ADAM, log_interval=LOG_EVERY_ADAM)
    

RUN: 'c:\Users\mattw\.conda\envs\mlproj\python.exe' 'c:\Users\mattw\Desktop\experiment\content\synth\main.py' --method DLNN --data gaussian --N 100 --rank 5 --reg_norm ratio --initscale 1e-3 --sample_size 2000 --seed 67 --depth 1 --lam 1e-4 --lam_schedule cosine --optim Adam --lr 1e-3 --niters 30000 --log_interval 1000
depth: 1, iteration: 0, test_MSE: 1.0109524726867676, test_RMSE: 1.0054613233171963, erank: 80.52301788330078, s1: 0.01973901316523552, s5: 0.017823003232479095

depth: 1, iteration: 1000, test_MSE: 0.9865087270736694, test_RMSE: 0.9932314569493202, erank: 65.4253158569336, s1: 7.805086612701416, s5: 5.830443382263184

depth: 1, iteration: 2000, test_MSE: 0.963529109954834, test_RMSE: 0.9815951863955089, erank: 59.937461853027344, s1: 11.485574722290039, s5: 8.507454872131348

depth: 1, iteration: 3000, test_MSE: 0.9332906603813171, test_RMSE: 0.9660696974759726, erank: 53.878475189208984, s1: 13.799129486083984, s5: 10.046590805053711

depth: 1, iteration: 4000, test_MS

In [None]:
#cell 4C - depth with penalty
#GD Fixed ONLY
# Depths {2, 3,4,5} with fixed λ = 1e-4
for depth in (2, 3, 4, 5):
    run_and_log(f"SYN_gd_d{depth}_lam1e-4_fixed",
        **BASE_SYN, depth=depth, lam="1e-4",
        optim="GD", lr=LR_GD, niters=LONG_ITERS_GD, log_interval=LOG_EVERY_GD)

# Fig.3: GD, depth=1, λ in {0, 1e-2}
for lam in ("0.0", "1e-2"):
    run_and_log(f"F3_gd_d1_lam{lam}",
        **BASE_SYN, depth=1, lam=lam,
        optim="GD", lr=LR_GD, niters=LONG_ITERS_GD, log_interval=LOG_EVERY_GD)


RUN: 'c:\Users\mattw\.conda\envs\mlproj\python.exe' 'c:\Users\mattw\Desktop\experiment\content\synth\main.py' --method DLNN --data gaussian --N 100 --rank 5 --reg_norm ratio --initscale 1e-3 --sample_size 2000 --seed 67 --depth 2 --lam 1e-4 --optim GD --lr 1e-3 --niters 800000 --log_interval 20000
depth: 2, iteration: 0, test_MSE: 1.0109117031097412, test_RMSE: 1.0054410490475019, erank: 66.73628997802734, s1: 0.0026257652789354324, s5: 0.002133142203092575

depth: 2, iteration: 20000, test_MSE: 1.0109120607376099, test_RMSE: 1.0054412268937503, erank: 51.75908279418945, s1: 0.004405342508107424, s5: 0.0028854848351329565

depth: 2, iteration: 40000, test_MSE: 1.0109084844589233, test_RMSE: 1.0054394484298512, erank: 38.66973876953125, s1: 0.008032809011638165, s5: 0.0036792350001633167

depth: 2, iteration: 60000, test_MSE: 1.010896921157837, test_RMSE: 1.0054336980417142, erank: 29.3104190826416, s1: 0.014231769368052483, s5: 0.0042870440520346165

depth: 2, iteration: 80000, test_MS

In [8]:
#cell 4D - depth with penalty #comment out depth 1
#GD (cosine only)
for depth in (2, 3, 4, 5):
    run_and_log(f"SYN_gd_d{depth}_lam1e-4_cos",
        **BASE_SYN, depth=depth, lam="1e-4", lam_schedule="cosine",
        optim="GD", lr=LR_GD, niters=LONG_ITERS_GD, log_interval=LOG_EVERY_GD)


RUN: 'c:\Users\mattw\.conda\envs\mlproj\python.exe' 'c:\Users\mattw\Desktop\experiment\content\synth\main.py' --method DLNN --data gaussian --N 100 --rank 5 --reg_norm ratio --initscale 1e-3 --sample_size 2000 --seed 67 --depth 2 --lam 1e-4 --lam_schedule cosine --optim GD --lr 1e-3 --niters 800000 --log_interval 20000
depth: 2, iteration: 0, test_MSE: 1.0109117031097412, test_RMSE: 1.0054410490475019, erank: 66.73628997802734, s1: 0.0026257652789354324, s5: 0.002133142203092575

depth: 2, iteration: 20000, test_MSE: 1.0109120607376099, test_RMSE: 1.0054412268937503, erank: 51.7667350769043, s1: 0.00440417742356658, s5: 0.002885157009586692

depth: 2, iteration: 40000, test_MSE: 1.010908603668213, test_RMSE: 1.0054395077120317, erank: 38.7194938659668, s1: 0.008015431463718414, s5: 0.003677935805171728

depth: 2, iteration: 60000, test_MSE: 1.010896921157837, test_RMSE: 1.0054336980417142, erank: 29.43391990661621, s1: 0.014148431830108166, s5: 0.004288895521312952

depth: 2, iteration

In [10]:
#cell 5
# Fetch ML-100k (safe no-op if present)
import os, urllib.request, zipfile, io
from pathlib import Path

# Reuse BASE_DIR from Cell 1/2
ML_DIR = BASE_DIR / "ml-100k"

if not (ML_DIR / "u.data").exists():
    print("Downloading MovieLens 100k …")
    data = urllib.request.urlopen(
        "https://files.grouplens.org/datasets/movielens/ml-100k.zip"
    ).read()
    # Extract into the project folder (BASE_DIR)
    zipfile.ZipFile(io.BytesIO(data)).extractall(BASE_DIR)
    assert (ML_DIR / "u.data").exists(), "MovieLens 100k not found after download."
print("✔ MovieLens ready.")


BASE_ML = dict(
    method="DLNN",
    data="ml-100k-sample",
    depth=1,
    optim="Adam",
    lr="5e-4", #working LR
    trainprop="0.8",
    reg_norm="ratio",
    initscale="1e-3",
    log_interval=1000,
    seed=67
)

#λ = 0 vs 1e-3
'''run_and_log("ML_lam0_adam_long",    **BASE_ML, lam="0.0",  niters=40_000)
run_and_log("ML_lam1e-3_adam_long", **BASE_ML, lam="1e-3", niters=40_000)'''

✔ MovieLens ready.


'run_and_log("ML_lam0_adam_long",    **BASE_ML, lam="0.0",  niters=40_000)\nrun_and_log("ML_lam1e-3_adam_long", **BASE_ML, lam="1e-3", niters=40_000)'

In [10]:
'''#cell 6
# Only if you want to test λ schedule on ML-100k as well.
ML_CFG = dict(**BASE_ML)  # from your Cell 7

run_and_log("ML_lam1e-3_adam_fixed", **ML_CFG, lam="1e-3", niters=40_000)
run_and_log("ML_lam1e-3_adam_cos",   **ML_CFG, lam="1e-3", lam_schedule="cosine", niters=40_000)
'''

'#cell 6\n# Only if you want to test λ schedule on ML-100k as well.\nML_CFG = dict(**BASE_ML)  # from your Cell 7\n\nrun_and_log("ML_lam1e-3_adam_fixed", **ML_CFG, lam="1e-3", niters=40_000)\nrun_and_log("ML_lam1e-3_adam_cos",   **ML_CFG, lam="1e-3", lam_schedule="cosine", niters=40_000)\n'

In [11]:
#Cell 7 - Parse logs (also captures test_MSE)
import os, re, glob, pandas as pd, numpy as np

LOG_GLOBS = [os.path.join(LOGDIR, "*.log")]
files = sorted({p for g in LOG_GLOBS for p in glob.glob(g)})

pat = re.compile(
    r"depth:\s*(\d+).*?iteration:\s*(\d+).*?"
    r"test_MSE:\s*([\-0-9\.Ee\+]+).*?test_RMSE:\s*([\-0-9\.Ee\+]+).*?"
    r"erank:\s*([\-0-9\.Ee\+]+).*?s1:\s*([\-0-9\.Ee\+]+).*?s5:\s*([\-0-9\.Ee\+]+)"
)

def infer_opt(name):
    n = name.lower()
    if "adam" in n: return "adam"
    if "gd"   in n: return "gd"
    return "unk"

rows = []
for path in files:
    with open(path, "r", errors="ignore") as f:
        for line in f:
            m = pat.search(line)
            if not m:
                continue
            d,it,mse,rmse,er,s1,s5 = m.groups()
            rows.append(dict(
                file=os.path.basename(path),
                optimizer=infer_opt(path),
                depth=int(d),
                iteration=int(it),
                test_MSE=float(mse),
                test_RMSE=float(rmse),
                erank=float(er),
                s1=float(s1),
                s5=float(s5),
            ))

df = pd.DataFrame(rows).sort_values(["optimizer","depth","iteration"]).reset_index(drop=True)
print(f"Parsed {len(df)} rows from {len(files)} log files.")
if df.empty:
    raise RuntimeError("No rows parsed – make sure runs completed and main.py prints test_MSE/test_RMSE/s1/s5.")
df.tail(10)


Parsed 730 rows from 21 log files.


Unnamed: 0,file,optimizer,depth,iteration,test_MSE,test_RMSE,erank,s1,s5
720,SYN_gd_d5_lam1e-4_cos.log,gd,5,700000,1.01089,1.00543,10.262406,0.066279,0.002848
721,SYN_gd_d5_lam1e-4_fixed.log,gd,5,700000,1.010833,1.005402,4.111128,0.163723,0.002526
722,SYN_gd_d5_lam1e-4_cos.log,gd,5,720000,1.010884,1.005427,9.397536,0.073433,0.002882
723,SYN_gd_d5_lam1e-4_fixed.log,gd,5,720000,1.010796,1.005384,3.463624,0.202492,0.002548
724,SYN_gd_d5_lam1e-4_cos.log,gd,5,740000,1.010877,1.005424,8.515001,0.082254,0.002916
725,SYN_gd_d5_lam1e-4_fixed.log,gd,5,740000,1.010733,1.005352,2.867002,0.261485,0.002571
726,SYN_gd_d5_lam1e-4_cos.log,gd,5,760000,1.010867,1.005419,7.619923,0.093366,0.002952
727,SYN_gd_d5_lam1e-4_fixed.log,gd,5,760000,1.010614,1.005293,2.329941,0.359824,0.002597
728,SYN_gd_d5_lam1e-4_cos.log,gd,5,780000,1.010853,1.005412,6.719895,0.107721,0.002989
729,SYN_gd_d5_lam1e-4_fixed.log,gd,5,780000,1.010357,1.005165,1.861857,0.547981,0.002625


In [12]:
#Cell 8 - Re-scan logs (captures test_MSE too)
import os, re, glob, pandas as pd, numpy as np

LOG_GLOBS = [os.path.join(LOGDIR, "*.log")]
files = sorted({p for g in LOG_GLOBS for p in glob.glob(g)})

pat = re.compile(
    r"depth:\s*(\d+).*?iteration:\s*(\d+).*?"
    r"test_MSE:\s*([\-0-9\.Ee\+]+).*?test_RMSE:\s*([\-0-9\.Ee\+]+).*?"
    r"erank:\s*([\-0-9\.Ee\+]+).*?s1:\s*([\-0-9\.Ee\+]+).*?s5:\s*([\-0-9\.Ee\+]+)"
)

def infer_opt(name):
    n = name.lower()
    if "adam" in n: return "adam"
    if "gd"   in n: return "gd"
    return "unk"

rows = []
for path in files:
    with open(path, "r", errors="ignore") as f:
        for line in f:
            m = pat.search(line)
            if not m:
                continue
            d,it,mse,rmse,er,s1,s5 = m.groups()
            rows.append(dict(
                file=os.path.basename(path),
                fullpath=path,
                optimizer=infer_opt(path),
                depth=int(d),
                iteration=int(it),
                test_MSE=float(mse),
                test_RMSE=float(rmse),
                erank=float(er),
                s1=float(s1),
                s5=float(s5),
            ))

df = pd.DataFrame(rows).sort_values(["file","iteration"]).reset_index(drop=True)
print(f"Parsed {len(df)} rows from {len(files)} log files.")
if df.empty:
    raise RuntimeError("No rows parsed — ensure runs completed and logs contain test_MSE/test_RMSE/s1/s5.")
df.tail(10)


Parsed 730 rows from 21 log files.


Unnamed: 0,file,fullpath,optimizer,depth,iteration,test_MSE,test_RMSE,erank,s1,s5
720,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,600000,1.010896,1.005433,7.890656,0.079443,0.002447
721,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,620000,1.010891,1.005431,7.075947,0.089207,0.00246
722,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,640000,1.010883,1.005427,6.288095,0.101268,0.002474
723,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,660000,1.010872,1.005422,5.528883,0.116555,0.002489
724,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,680000,1.010857,1.005414,4.801526,0.136555,0.002507
725,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,700000,1.010833,1.005402,4.111128,0.163723,0.002526
726,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,720000,1.010796,1.005384,3.463624,0.202492,0.002548
727,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,740000,1.010733,1.005352,2.867002,0.261485,0.002571
728,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,760000,1.010614,1.005293,2.329941,0.359824,0.002597
729,SYN_gd_d5_lam1e-4_fixed.log,c:\Users\mattw\Desktop\experiment\run_logs_ful...,gd,5,780000,1.010357,1.005165,1.861857,0.547981,0.002625


In [None]:
# Cell 9 - Fig.3 (depth=1): λ ∈ {0, 1e−2}; per-optimizer plots for MSE, erank, s1, s5
import re
import numpy as np
import matplotlib.pyplot as plt

pat = r"^F3_(adam|gd)_d1_lam([0-9eE\.\-\+]+)(?:\.log)?$"

sub = (
    df[(df["depth"] == 1) & (df["file"].str.match(pat, na=False))]
      .copy()
      .sort_values(["file","iteration"])
)

if sub.empty:
    print("[Fig.3] No rows found for depth=1 F3_* runs. "
          "Expected files like F3_adam_d1_lam0.0.log and F3_gd_d1_lam1e-2.log")
else:
    #Extract optimizer and lambda from filename
    parsed = sub["file"].str.extract(pat)
    sub["opt2"]    = parsed[0].str.lower() #adam or gd
    sub["lam_str"] = parsed[1]

    def lam_label(s: str) -> str:
        s = s.strip()
        return "0" if s in ("0", "0.0", "0.00") else s.replace("e-0", "e-")  # tidy label
    sub["lam_lbl"] = sub["lam_str"].map(lam_label)

    def _finite_xy(g, ycol):
        g = g.sort_values("iteration")
        x = g["iteration"].to_numpy()
        y = g[ycol].to_numpy()
        mask = np.isfinite(y)
        return x[mask], y[mask]

    def plot_one_optimizer(opt_name: str):
        gopt = sub[sub["opt2"] == opt_name].copy()
        if gopt.empty:
            print(f"[Fig.3] No rows for optimizer: {opt_name.upper()}")
            return

        #Test MSE (log-y) ---
        plt.figure(figsize=(7.0, 4.4))
        for lam, g in gopt.groupby("lam_lbl"):
            x, y = _finite_xy(g, "test_MSE")
            y = np.maximum(y, 1e-12)
            plt.semilogy(x, y, label=f"λ={lam}")
        plt.title(f"Fig.3 - Depth=1, Test MSE ({opt_name.upper()})")
        plt.xlabel("iteration"); plt.ylabel("test_MSE (log scale)")
        plt.legend(); plt.tight_layout(); plt.show()

        #Effective rank
        plt.figure(figsize=(7.0, 4.4))
        for lam, g in gopt.groupby("lam_lbl"):
            x, y = _finite_xy(g, "erank")
            plt.plot(x, y, label=f"λ={lam}")
        plt.title(f"Fig.3 - Depth=1, Effective Rank ({opt_name.upper()})")
        plt.xlabel("iteration"); plt.ylabel("effective rank")
        plt.legend(); plt.tight_layout(); plt.show()

        #s1 (top singular value) ---
        plt.figure(figsize=(7.0, 4.4))
        for lam, g in gopt.groupby("lam_lbl"):
            x, y = _finite_xy(g, "s1")
            plt.plot(x, y, label=f"λ={lam}")
        plt.title(f"Fig.3 - Depth=1, s1 ({opt_name.upper()})")
        plt.xlabel("iteration"); plt.ylabel("s1")
        plt.legend(); plt.tight_layout(); plt.show()

        #s5 (5th singular value) ---
        plt.figure(figsize=(7.0, 4.4))
        for lam, g in gopt.groupby("lam_lbl"):
            x, y = _finite_xy(g, "s5")
            if len(x) == 0:
                continue  #all NaNs (e.g., early iterations)
            plt.plot(x, y, label=f"λ={lam}")
        plt.title(f"Fig.3 - Depth=1, s5 ({opt_name.upper()})")
        plt.xlabel("iteration"); plt.ylabel("s5")
        plt.legend(); plt.tight_layout(); plt.show()

    #Make the eight plots: Adam (4) then GD (4)
    plot_one_optimizer("adam")
    plot_one_optimizer("gd")


: 

In [None]:
# Cell 10 — depths={2,3,4,5}, four separate charts: GD-fixed, GD-cos, Adam-fixed, Adam-cos
import matplotlib.pyplot as plt
import numpy as np

def _sched_tag_ok(fname: str, tag: str) -> bool:
    """Return True iff filename contains the schedule tag."""
    f = (fname or "").lower()
    if tag == "fixed":
        return "fixed" in f
    if tag == "cos":
        return "cos" in f
    return False

def plot_mse_for(df, opt_name: str, sched_tag: str, depths=(2,3,4,5)):
    """
    Make one semilog-y Test MSE chart for a given optimizer ('gd' or 'adam')
    and schedule tag ('fixed' or 'cos'). Curves: depths 2–5.
    """
    mask = (
        (df["optimizer"].str.lower() == opt_name.lower()) &
        (df["depth"].isin(depths)) &
        (df["file"].str.contains("lam1e-4", na=False))
    )
    sub = df[mask].copy()
    if sub.empty:
        print(f"[{opt_name}/{sched_tag}] No rows for λ=1e-4.")
        return

    # Keep only rows whose filenames match the schedule tag
    sub = sub[[ _sched_tag_ok(fn, sched_tag) for fn in sub["file"] ]]
    if len(sub) == 0:
        print(f"[{opt_name}/{sched_tag}] No files matching schedule tag '{sched_tag}'.")
        return

    depth_colors = {d: f"C{i}" for i, d in enumerate(sorted(set(depths)))}

    plt.figure(figsize=(7.2, 4.6))
    # group by file (each file corresponds to one run/depth); label by depth
    for file_name, g in sub.groupby("file"):
        g = g.sort_values("iteration")
        d  = int(g["depth"].iloc[0])
        if d not in depths:
            continue
        y = np.maximum(g["test_MSE"].to_numpy(), 1e-12)
        plt.semilogy(g["iteration"], y,
                     color=depth_colors.get(d, "C0"),
                     label=f"d={d}")

    opt_label  = opt_name.upper()
    sched_full = "fixed λ" if sched_tag == "fixed" else "cosine λ decay"
    plt.title(f"Test MSE — {opt_label} ({sched_full}), λ₀=1e−4")
    plt.xlabel("iteration"); plt.ylabel("test_MSE (log scale)")
    plt.legend(title="depth", loc="upper left", bbox_to_anchor=(1.02,1.0))
    plt.tight_layout()
    plt.show()

# Four charts: GD (fixed, cos) and Adam (fixed, cos)
plot_mse_for(df, "gd",   "fixed", depths=(2,3,4,5))
plot_mse_for(df, "gd",   "cos",   depths=(2,3,4,5))
plot_mse_for(df, "adam", "fixed", depths=(2,3,4,5))
plot_mse_for(df, "adam", "cos",   depths=(2,3,4,5))


In [None]:
# Cell 10.5 — depths={2,3,4,5}: four separate charts for singular values (s1 & s5)
# Charts: GD-fixed, GD-cos, Adam-fixed, Adam-cos
import matplotlib.pyplot as plt
import numpy as np

def _sched_tag_ok(fname: str, tag: str) -> bool:
    f = (fname or "").lower()
    return (tag == "fixed" and "fixed" in f) or (tag == "cos" and "cos" in f)

def _finite_xy(g, col):
    x = g["iteration"].to_numpy()
    y = g[col].to_numpy()
    m = np.isfinite(x) & np.isfinite(y)
    return x[m], y[m]

def plot_sv_for(df, opt_name: str, sched_tag: str, depths=(2,3,4,5)):
    mask = (
        (df["optimizer"].str.lower() == opt_name.lower()) &
        (df["depth"].isin(depths)) &
        (df["file"].str.contains("lam1e-4", na=False))
    )
    sub = df[mask].copy()
    if sub.empty:
        print(f"[{opt_name}/{sched_tag}] No rows for λ=1e-4.")
        return

    sub = sub[[ _sched_tag_ok(fn, sched_tag) for fn in sub["file"] ]]
    if len(sub) == 0:
        print(f"[{opt_name}/{sched_tag}] No files matching schedule tag '{sched_tag}'.")
        return

    depth_colors = {d: f"C{i}" for i, d in enumerate(sorted(set(depths)))}

    plt.figure(figsize=(7.6, 4.8))
    for file_name, g in sub.groupby("file"):
        g = g.sort_values("iteration")
        d  = int(g["depth"].iloc[0])
        if d not in depths:
            continue

        # s1 (solid)
        x1, y1 = _finite_xy(g, "s1")
        if len(x1):
            plt.plot(x1, y1, color=depth_colors.get(d, "C0"), linestyle="-",
                     label=f"d={d} • s1")

        # s5 (dashed)
        x5, y5 = _finite_xy(g, "s5")
        if len(x5):
            plt.plot(x5, y5, color=depth_colors.get(d, "C0"), linestyle="--",
                     label=f"d={d} • s5")

    opt_label  = opt_name.upper()
    sched_full = "fixed λ" if sched_tag == "fixed" else "cosine λ decay"
    plt.title(f"Singular values (s1 & s5) — {opt_label} ({sched_full}), λ₀=1e−4")
    plt.xlabel("iteration"); plt.ylabel("singular value")
    plt.legend(ncol=2, loc="upper left", bbox_to_anchor=(1.02, 1.0), title="depth/value")
    plt.tight_layout()
    plt.show()

# Four charts: GD (fixed, cos) and Adam (fixed, cos)
plot_sv_for(df, "gd",   "fixed", depths=(2,3,4,5))
plot_sv_for(df, "gd",   "cos",   depths=(2,3,4,5))
plot_sv_for(df, "adam", "fixed", depths=(2,3,4,5))
plot_sv_for(df, "adam", "cos",   depths=(2,3,4,5))


In [None]:
#cell 11
# Compute time-to-erank ≤ r* + ε and min test_RMSE
TARGET_R = 5.0    # synthetic true rank
EPS      = 0.5    # tolerance; tweak if needed

def time_to_rank(group, target=TARGET_R, eps=EPS):
    g = group.sort_values("iteration")
    hit = g[g["erank"] <= (target + eps)]
    return int(hit["iteration"].iloc[0]) if not hit.empty else None

def best_rmse(group):
    return float(group["test_RMSE"].min())

summary_rows = []
for depth in (1, 2, 3, 4, 5):
    subset = df[(df.depth==depth) & df.file.str.contains("SYN_adam_d", na=False)]
    if subset.empty:
        continue
    for key, g in subset.groupby("file"):
        sched = "cosine" if "cos" in key.lower() else "fixed"
        t_hit = time_to_rank(g)
        rmin  = best_rmse(g)
        summary_rows.append(dict(depth=depth, schedule=sched, time_to_rank=t_hit, best_test_RMSE=rmin, file=key))

summary = pd.DataFrame(summary_rows).sort_values(["depth","schedule"])
print("=== Time-to-rank and Best RMSE (Synthetic, Adam) ===")
print(summary[["depth","schedule","time_to_rank","best_test_RMSE","file"]].to_string(index=False))

# Quick winner verdict per depth
print("\n=== Verdict per depth ===")
for depth, g in summary.groupby("depth"):
    g = g.copy()
    # Prefer lower time_to_rank; tie-break by best RMSE
    def score(row):
        # None (no hit) ranks worse than any integer
        t = row["time_to_rank"]
        t = 10**12 if t is None else t
        return (t, row["best_test_RMSE"])
    winner = g.loc[g.apply(score, axis=1).idxmin()]
    print(f"depth={depth}: WINNER → {winner['schedule']} (time_to_rank={winner['time_to_rank']}, best_RMSE={winner['best_test_RMSE']:.6f})")


In [None]:
'''#cell 12
ml = df[df["file"].str.contains("ML_", na=False)]
assert not ml.empty, "No MovieLens rows parsed. Run Cell 6 first."

#Extract λ from filename for labeling
def lab(name):
    return "λ=1e-3" if "lam1e-3" in name else "λ=0"

def plot_ml(metric, title):
    plt.figure(figsize=(6.6,4.4))
    for key, g in ml.groupby("file"):
        g = g.sort_values("iteration")
        plt.plot(g["iteration"], g[metric], "--", label=f"Adam d=1, {lab(key)}")
    plt.xlabel("iteration"); plt.ylabel(metric); plt.title(title)
    plt.legend(); plt.tight_layout(); plt.show()

plot_ml("test_RMSE", "MovieLens: test_RMSE")
plot_ml("erank",     "MovieLens: erank")

#summary table
last = (ml.sort_values(["file","iteration"])
          .groupby("file").tail(1)
          [["file","iteration","test_RMSE","erank","s1","s5"]])
print("=== MovieLens last metrics ===")
print(last.to_string(index=False))'''

In [None]:
#cell 13
last_syn = (df[~df["file"].str.contains("ML_", na=False)]
              .sort_values(["optimizer","depth","iteration"])
              .groupby(["optimizer","depth"]).tail(1)
              [["optimizer","depth","iteration","test_RMSE","erank","s1","s5"]])
print("=== Last metrics (synthetic, per optimizer & depth) ===")
print(last_syn.to_string(index=False))

In [None]:
#cell 14
# Hypothesis scoreboard for Adam & GD (depth 1-5; λ=1e-4)
import pandas as pd

def time_to_rank(g, r_star=5.0, eps=0.5):
    hit = g[g["erank"] <= (r_star + eps)].sort_values("iteration")
    return int(hit["iteration"].iloc[0]) if not hit.empty else None

rows = []
for opt in ("adam","gd"):
    for depth in (1, 2, 3, 4, 5):
        for tag, label in (("fixed","fixed"), ("cos","cosine")):
            sub = df[(df.optimizer==opt) &
                     (df.depth==depth) &
                     (df.file.str.contains(f"lam1e-4_{tag}", na=False))]\
                  .sort_values(["file","iteration"])
            if sub.empty: continue
            rows.append(dict(
                optimizer=opt,
                depth=depth,
                schedule=label,
                time_to_rank=time_to_rank(sub, r_star=5.0, eps=0.5),
                best_test_RMSE=sub["test_RMSE"].min(),
                file=sub["file"].iloc[0]
            ))

res = pd.DataFrame(rows).sort_values(["optimizer","depth","schedule"])
print("=== Time-to-rank and Best RMSE (Synthetic, λ=1e-4) ===")
print(res.to_string(index=False))

print("\n=== Verdict per (optimizer, depth) ===")
for (opt, d), g in res.groupby(["optimizer","depth"]):
    g2 = g.copy(); g2["time_to_rank"] = g2["time_to_rank"].fillna(1e12)
    win = g2.sort_values(["time_to_rank","best_test_RMSE"]).iloc[0]
    print(f"{opt.upper()}, d={d}: WINNER → {win['schedule']} "
          f"(time_to_rank={win['time_to_rank']}, best_RMSE={win['best_test_RMSE']:.6f})")
