# Auto migrate whatever possible

In [None]:
#!/usr/bin/env python3
"""
migrate_tex.py – convert legacy .tex sources to the new preamble/layout.

Usage (no argparse by design):
    >>> from migrate_tex import run
    >>> run("/path/to/folder", prefix="week_")

It scans *only* that folder (non-recursive) for .tex files whose
basename starts with the given prefix, applies the automatic edits,
leaves the originals commented with “% OLD”, inserts the new code
labelled “% new”, and writes results to  <folder>/auto/<oldname>_auto.tex.

Both console and file logging (migration.log inside the auto folder) show
which files were changed or skipped.
"""

In [8]:

from __future__ import annotations

import logging
import os
import pathlib
import re
from typing import Callable, List, Tuple

# --------------------------------------------------------------------------- #
# 1. Compile all replacement patterns once – each returns (new_text, changed)
# --------------------------------------------------------------------------- #

# (a) \documentclass
_docclass_re = re.compile(
    r"^(\\documentclass\[\s*)11pt,(compress,t,notes=noshow,\s*aspectratio=169,\s*xcolor=table\](\s*\{beamer\}))", re.M
)


def _fix_docclass(text: str) -> Tuple[str, bool]:
    def _sub(m: re.Match[str]) -> str:
        old_line = m.group(0)
        new_line = r"\documentclass[10pt,compress,t,notes=noshow, xcolor=table]{beamer}"
        return f"% OLD\n%{old_line}\n% new\n{new_line}"

    new, n = _docclass_re.subn(_sub, text)
    return new, n > 0


# (b) \usepackage → \input
_usepkg_re = re.compile(r"^(\\)usepackage\{(\.\./\.\./style/)lmu-lecture\}", re.M)


def _fix_usepackage(text: str) -> Tuple[str, bool]:
    def _sub(m: re.Match[str]) -> str:
        old_line = m.group(0)
        new_line = rf"{m.group(1)}input{{{m.group(2)}preamble}}"
        return f"% OLD\n%{old_line}\n% new\n{new_line}"

    new, n = _usepkg_re.subn(_sub, text)
    return new, n > 0


# (c) title/goal/chapter/lecture block → \titlemeta{...}
_block_re = re.compile(
    r"""
    \\newcommand\{\\titlefigure\}\{(?P<path>[^}]*)\}\s*
    \\newcommand\{\\learninggoals\}\{(?P<goals>.*?)\}\s*
    \\lecturechapter\{(?P<title>[^}]*)\}\s*
    \\lecture\{(?P<subtitle>[^}]*)\}
    """,
    re.S | re.X,
)


def _fix_block(text: str) -> Tuple[str, bool]:
    def _sub(m: re.Match[str]) -> str:
        old = m.group(0)
        new_block = (
            "% new\n"
            f"\\titlemeta{{\n{m.group('title')}\n}}{{\n{m.group('subtitle')}\n}}{{\n"
            f"{m.group('path')}\n}}{{\n{m.group('goals')}\n}}\n"
        )
        commented_old = "% OLD\n%" + old.replace("\n", "\n%")
        return f"{commented_old}\n{new_block}"

    new, n = _block_re.subn(_sub, text)
    return new, n >  0


_TRANSFORMS: List[Callable[[str], Tuple[str, bool]]] = [
    _fix_docclass,
    _fix_usepackage,
    _fix_block,
]


# --------------------------------------------------------------------------- #
# 2. Main runner
# --------------------------------------------------------------------------- #
def run(folder: str | os.PathLike[str], prefix: str = "") -> None:
    """Convert all matching .tex files in *folder*; create `<folder>/auto`."""
    folder = pathlib.Path(folder).expanduser().resolve()
    auto_dir = folder / "auto"
    auto_dir.mkdir(exist_ok=True)

    # configure logging once per run
    log_file = auto_dir / "migration.log"
    logging.basicConfig(
        level=logging.INFO,
        format="%(levelname)s | %(message)s",
        handlers=[
            logging.StreamHandler(),
            logging.FileHandler(log_file, encoding="utf-8", mode="w"),
        ],
    )
    logger = logging.getLogger("migrate_tex")

    logger.info("Starting migration in %s (prefix '%s')", folder, prefix)

    for tex_path in folder.glob(f"{prefix}*.tex"):
        if tex_path.is_dir():
            continue  # defensive; glob shouldn't pick dirs
        with tex_path.open(encoding="utf-8") as fh:
            content = fh.read()

        changed = False
        for fn in _TRANSFORMS:
            content, was_changed = fn(content)
            changed |= was_changed

        if not changed:
            logger.info("SKIP %s – no patterns found", tex_path.name)
            continue

        new_name = f"{tex_path.stem}_auto.tex"
        out_path = auto_dir / new_name
        out_path.write_text(content, encoding="utf-8")
        logger.info("WROTE %s", new_name)

    logger.info("Migration finished. See %s for details.", log_file)



In [9]:
from pathlib import Path

folder = Path("slides") / "05_functional-decompositions"
prefix = "slides"

run(folder, prefix)

INFO | Starting migration in \\wsl.localhost\Ubuntu\root\lecture_service_attempt\slides\05_functional-decompositions (prefix 'slides')


INFO | WROTE slides03-fd-h-statistic_auto.tex
INFO | WROTE slides05-fd-other-methods_auto.tex
INFO | WROTE slides04-fd-constraints-and-theory_auto.tex
INFO | WROTE slides02-fd-standard-fANOVA_auto.tex
INFO | WROTE slides01-fd-intro-motivation_auto.tex
INFO | Migration finished. See \\wsl.localhost\Ubuntu\root\lecture_service_attempt\slides\05_functional-decompositions\auto\migration.log for details.


# Count files

In [2]:
#!/usr/bin/env python3
"""
Count files in every sub‑directory under a given root and report the total.

Usage (from the command line):
    python count_files.py /path/to/root
"""

from pathlib import Path
import os
import sys
from pprint import pprint


def count_files_by_directory(root: str | os.PathLike = ".") -> tuple[dict[Path, int], int]:
    """
    Recursively count files inside every directory under *root*.

    Parameters
    ----------
    root : str | Path
        Directory to start walking from.

    Returns
    -------
    dir_counts : dict[Path, int]
        Mapping of absolute directory path → number of (non‑hidden) files it contains.
    total_files : int
        Sum of all counted files.
    """
    dir_counts: dict[Path, int] = {}
    total_files = 0
    root_path = Path(root).expanduser().resolve()

    for dirpath, _, filenames in os.walk(root_path):
        # Skip hidden files (starting with "."); remove this filter if you want to include them
        visible_files = [f for f in filenames if not f.startswith(".")]
        count = len(visible_files)

        dir_counts[Path(dirpath)] = count
        total_files += count

    return dir_counts, total_files


counts, total = count_files_by_directory()

pprint(counts)            # Nicely formats the per‑directory counts
print(f"\nTotal files: {total}")


{WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt'): 5,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git'): 7,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/branches'): 0,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/hooks'): 14,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/info'): 1,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs'): 1,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs/refs'): 0,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs/refs/heads'): 1,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs/refs/remotes'): 0,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs/refs/remotes/origin'): 1,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/objects'): 0,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_servi

In [3]:
#!/usr/bin/env python3
"""
Utility helpers:

- count_files_by_directory(root, include=None, exclude=None)
      → (dict[pathlib.Path, int], int)

- zip_tree(root, zip_path, include=None, exclude=None, *, keep_structure=True)
      → pathlib.Path  # the ZIP file that was written
"""

from pathlib import Path
import os
import zipfile
from typing import Iterable, Sequence


def _normalize_list(
    items: Sequence[str | os.PathLike] | None, root: Path
) -> set[Path] | None:
    """Return absolute Path objects or None if *items* is None/empty."""
    if not items:
        return None
    return {root.joinpath(Path(p).as_posix()).resolve() for p in items}


def _should_skip(path: Path, include: set[Path] | None, exclude: set[Path] | None) -> bool:
    """True if *path* (a directory) must be skipped according to include/exclude."""
    if include is not None:
        # Skip everything NOT under one of the include paths
        return not any(include_path in path.parents or include_path == path for include_path in include)
    if exclude is not None:
        # Skip anything that is (or is inside) an excluded dir
        return any(ex_path in path.parents or ex_path == path for ex_path in exclude)
    return False


def count_files_by_directory(
    root: str | os.PathLike = ".",
    *,
    include: Sequence[str | os.PathLike] | None = None,
    exclude: Sequence[str | os.PathLike] | None = None,
) -> tuple[dict[Path, int], int]:
    """
    Recursively count visible files in *root*, honoring optional include/exclude.

    include / exclude:
        Iterable of directory paths relative to *root*.
        • If *include* is given, ONLY those paths (and their sub‑trees) are considered.
        • If *include* is None but *exclude* is provided, those paths are skipped.
        • Leading dots (hidden dirs/files) are always ignored.
    """
    root = Path(root).expanduser().resolve()
    include_set = _normalize_list(include, root)
    exclude_set = _normalize_list(exclude, root)

    dir_counts: dict[Path, int] = {}
    total = 0

    for dirpath, _, filenames in os.walk(root):
        dir_path = Path(dirpath)

        # Determine if this directory should be processed
        if _should_skip(dir_path, include_set, exclude_set):
            continue

        # Skip hidden directories entirely
        if dir_path.name.startswith("."):
            continue

        visible_files = [f for f in filenames if not f.startswith(".")]
        count = len(visible_files)
        dir_counts[dir_path] = count
        total += count

    return dir_counts, total


def zip_tree(
    root: str | os.PathLike,
    zip_path: str | os.PathLike,
    include: Sequence[str | os.PathLike] | None = None,
    exclude: Sequence[str | os.PathLike] | None = None,
    *,
    keep_structure: bool = True,
) -> Path:
    """
    Compress *root* into *zip_path* applying the same include/exclude logic.

    Parameters
    ----------
    keep_structure : bool
        • True  → store each file with its path relative to *root* (default).  
        • False → store only basenames (may cause name clashes).

    Returns  -------
    Path to the created ZIP archive.
    """
    root = Path(root).expanduser().resolve()
    zip_path = Path(zip_path).expanduser().resolve()
    include_set = _normalize_list(include, root)
    exclude_set = _normalize_list(exclude, root)

    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        for dirpath, _, filenames in os.walk(root):
            dir_path = Path(dirpath)

            if _should_skip(dir_path, include_set, exclude_set):
                continue
            if dir_path.name.startswith("."):
                continue

            for fname in filenames:
                if fname.startswith("."):
                    continue
                fpath = dir_path / fname
                arcname = fpath.relative_to(root) if keep_structure else fpath.name
                zf.write(fpath, arcname)

    return zip_path


# -------- example usage --------
if __name__ == "__main__":
    project_root = Path(".")  # current directory

    # Only include specific sub‑folders (relative to root)
    include_dirs = ["src", "tests"]

    # counts, total = count_files_by_directory(project_root, include=include_dirs)
    # print("Per‑directory counts:")
    # for d, c in counts.items():
    #     print(f"{d}: {c}")
    # print(f"TOTAL: {total}")

    # Create archive, excluding virtual‑env and build artifacts
    archive = zip_tree(
        project_root,
        "to_overleaf.zip",
        include=["latex-math", "slides", "style"],
    )
    print(f"\nCreated ZIP archive at {archive}")



Created ZIP archive at \\wsl.localhost\Ubuntu\root\lecture_service_attempt\to_overleaf.zip


In [4]:
len(os.listdir("style"))

25

In [5]:
count_files_by_directory("style")

({WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/style'): 23,
  WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/style/color'): 3,
  WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/style/logos'): 24},
 50)

# hanging words

In [7]:
!uv pip install pdfplumber

[2mUsing Python 3.10.18 environment at: C:\Users\hayk_\.conda\envs\lectures[0m
[2mResolved [1m8 packages[0m [2min 777ms[0m[0m
[36m[1mDownloading[0m[39m pdfminer-six [2m(5.4MiB)[0m
[36m[1mDownloading[0m[39m cryptography [2m(3.2MiB)[0m
[36m[1mDownloading[0m[39m pypdfium2 [2m(2.8MiB)[0m
 [32m[1mDownloading[0m[39m pypdfium2
 [32m[1mDownloading[0m[39m cryptography
 [32m[1mDownloading[0m[39m pdfminer-six
[2mPrepared [1m6 packages[0m [2min 1.04s[0m[0m
[2mInstalled [1m6 packages[0m [2min 682ms[0m[0m
 [32m+[39m [1mcffi[0m[2m==1.17.1[0m
 [32m+[39m [1mcryptography[0m[2m==45.0.5[0m
 [32m+[39m [1mpdfminer-six[0m[2m==20250506[0m
 [32m+[39m [1mpdfplumber[0m[2m==0.11.7[0m
 [32m+[39m [1mpycparser[0m[2m==2.22[0m
 [32m+[39m [1mpypdfium2[0m[2m==4.30.1[0m


In [None]:
import pdfplumber
from pathlib import Path

pdf_path = Path("pdfs") / "02_interp_models_1" / "02_02_lm_simple_after.pdf" 
assert pdf_path.exists(), f"PDF file not found: {pdf_path}"
results = []

with pdfplumber.open(pdf_path) as pdf:
    for page_num, page in enumerate(pdf.pages, start=1):
        # Extract lines with positions
        for line in page.extract_text(x_tolerance=2, y_tolerance=2).split("\n"):
            # Very naïve header/footer filter:
            if page.height * 0.05 < page.extract_words()[0]['top'] < page.height * 0.95:
                words = line.strip().split()
                print(f"Page {page_num}: {line}")
                if len(words) == 1:
                    bbox = [w for w in page.extract_words() if w['text'] == words[0]][0]
                    results.append({
                        "page": page_num,
                        "text": words[0],
                        "bbox": (bbox['x0'], bbox['top'], bbox['x1'], bbox['bottom'])
                    })

# Show summary
for r in results:
    print(f"Page {r['page']}: “{r['text']}” at {r['bbox']}")


Page 1: Interpretable Machine Learning
Page 1: Linear Regression Model
Page 1: 400
Page 1: 300
Page 1: 200
Page 1: 100
Page 1: 0
Page 1: −100
Page 1: 0.0 2.5 5.0 7.5 10.0
Page 1: x
Page 1: y
Page 1: Learning goals
Page 1: LM basics and assumptions
Page 1: Interpretation of main effects in LM
Page 1: What are significant features?
Page 1: “400” at (30.7588364, 149.53615468000024, 36.12052232, 152.75059468000023)
Page 1: “300” at (30.7588364, 161.34743588000023, 36.12052232, 164.56187588000023)
Page 1: “200” at (30.7588364, 173.15514548000021, 36.12052232, 176.36958548000024)
Page 1: “100” at (30.7588364, 184.96642668000024, 36.12052232, 188.18086668000024)
Page 1: “0” at (34.334008, 196.77413628000022, 36.12123664, 199.98857628000025)
Page 1: “−100” at (28.8837464, 208.58541748000025, 36.12266528, 211.79985748000024)
Page 1: “x” at (92.40108079999999, 216.27292932000023, 94.3654608, 220.20168932000024)
Page 1: “y” at (23.92529412, 177.93287480000023, 27.85405412, 179.89725480000024)
