https://chatgpt.com/share/e/68752101-7ddc-8012-911b-5dfa825e7b79

Objective: Improve text layout by preventing isolated words from appearing alone on a new line (e.g., due to line wrapping). To achieve this:

Try to shorten the sentence while preserving the original meaning.

Use common abbreviations or reformulate phrases if that helps reduce length.

If the sentence cannot be shortened meaningfully, suggest a version that is slightly longer, so more words are pushed to the next line—minimizing the isolation issue.

Instructions:

Always preserve the semantic meaning and tone.

If abbreviations are used, ensure they are commonly understood or intuitive in context.

Output both the shortened version (if successful) and, if not possible, a longer alternative that balances the layout better.

<INSTRUCTION>
1. Always suggest at least 2 variants.
2. Don't explain yourself

In [None]:
from pathlib import Path

FOLDER = Path("slides") / "08_regional-effects"
PREFIX = "slides"
INCLUDE_COMMENTS = False

TITLE_MAIN = "Regional Effects"

assert FOLDER.exists(), f"Folder {FOLDER} does not exist. Dirs: \n{list(FOLDER.parent.iterdir())}"

if not INCLUDE_COMMENTS:
    print("Running without comments in the output .tex files.")

Running without comments in the output .tex files.


# Auto migrate whatever possible

## Create bibtex

In [64]:
#!/usr/bin/env python3
"""
extract_citebutton_to_bib.py – build *fresh* `references.bib` from custom
\citebutton occurrences in a folder’s .tex sources.

Key behaviour
-------------
* **Non‑recursive** scan of `.tex` files whose basename begins with an optional
    prefix.  The prefix is removed in the comment banner that precedes each
    group of references.
* Creates (or overwrites) `auto/references.bib` in the same folder.
* Deduplicates keys **within the current run** so the output never contains
    duplicates, even if different files cite the same link.
* Each BibTeX entry now contains the fields you requested:

        % \citebutton{<...original...>}
        @article{KEY,
                author = {<guessed author>},
                title  = {<first argument of \citebutton>},
                year   = {YYYY},
                url    = {<second argument>}
        }

    Author is heuristically extracted from the label text (the first capitalized
    token preceding a 4‑digit year, or the first capitalized word if no year is
    present).  You can refine `_guess_author()` if needed.
* The script logs **which file it is scanning** and lists all reference keys
    extracted from that file.  Logs go to both console and
    `auto/references_build.log`.
"""
from __future__ import annotations

import logging
import os
import pathlib
import re
from typing import List, Set

# --------------------------------------------------------------------------- #
# 1. Regex helpers                                                            #
# --------------------------------------------------------------------------- #
_CITE_RE = re.compile(r"\\citebutton\{([^}]*)\}\{([^}]*)\}")
_YEAR_RE = re.compile(r"(19|20)\d{2}")
_CAP_WORD_RE = re.compile(r"[A-Z][A-Za-z]+")

# --------------------------------------------------------------------------- #
# 2. Helper functions                                                         #
# --------------------------------------------------------------------------- #

def _slug(text: str) -> str:
    """Convert arbitrary text into a safe ASCII slug (for BibTeX key)."""
    return re.sub(r"[^A-Za-z0-9]+", "_", text).strip("_") or "ref"


def _guess_author(label: str) -> str:
    """Try to infer an author surname from the label text."""
    # First, look for a capitalized word immediately *before* a year.
    for m in _YEAR_RE.finditer(label):
            pre = label[: m.start()].strip()
            if pre:
                    words = _CAP_WORD_RE.findall(pre)
                    if words:
                            return words[-1]
    # Fall back: first capitalized word anywhere.
    m = _CAP_WORD_RE.search(label)
    return m.group(0) if m else _slug(label)[:8]


def _make_key(author: str, year: str) -> str:
    """Return AUTHOR_YEAR if year present, else AUTHOR_XXXX."""
    if year:
        return f"{author}_{year}"
    return author

# --------------------------------------------------------------------------- #
# 3. Main runner                                                              #
# --------------------------------------------------------------------------- #

def create_bibtex(folder: str | os.PathLike[str], prefix: str = "slides", 
                  save_in_new_dir: bool = False) -> None:    
    """Extract citebutton references and write `references.bib`."""
    folder = pathlib.Path(folder).expanduser().resolve()
    if save_in_new_dir:
        auto_dir = folder / "auto"
    else:
        auto_dir = folder
    auto_dir.mkdir(exist_ok=True)

    bib_path = auto_dir / "references.bib"
    log_path = auto_dir / "references_build.log"

    logging.basicConfig(
            level=logging.INFO,
            format="%(levelname)s | %(message)s",
            handlers=[
                    logging.StreamHandler(),
                #     logging.FileHandler(log_path, encoding="utf-8", mode="w"),
            ],
    )
    logger = logging.getLogger("citebutton2bib")

    added_keys: Set[str] = set()
    output_lines: List[str] = []

    for tex_path in folder.glob(f"{prefix}*.tex"):
            if tex_path.is_dir():
                    continue
            logger.info("Scanning %s", tex_path.name)
            content = tex_path.read_text(encoding="utf-8")
            matches = list(_CITE_RE.finditer(content))
            if not matches:
                    logger.info("No citebuttons found in %s", tex_path.name)
                    continue

            # Strip prefix from filename for the comment banner
            display_name = (
                    tex_path.name[len(prefix):]
                    if prefix and tex_path.name.startswith(prefix)
                    else tex_path.name
            )

            file_block: List[str] = [f"% ================= {display_name} ================="]
            keys_in_file: List[str] = []

            for m in matches:
                    label, url = m.group(1).strip(), m.group(2).strip()
                    year_match = _YEAR_RE.search(label) or _YEAR_RE.search(url)
                    year_val = year_match.group(0) if year_match else ""
                    author_val = _guess_author(label)
                    key = _make_key(author_val, year_val)

                    if key in added_keys:
                            logger.info("SKIP duplicate key %s", key)
                            continue

                    original_cite = m.group(0)
                    entry = [
                            f"% {original_cite}",
                            f"@article{{{key},",
                            f"    author = {{{author_val}}},",
                            f"    title  = {{{label}}},",
                            f"    year   = {{{year_val}}},",
                            f"    url    = {{{url}}}",
                            "}",
                            "",  # blank line between entries
                    ]
                    file_block.extend(entry)
                    added_keys.add(key)
                    keys_in_file.append(key)

            if keys_in_file:
                    output_lines.extend(file_block)
                    logger.info(
                            "Added %d reference(s) from %s – keys: %s",
                            len(keys_in_file),
                            tex_path.name,
                            ", ".join(keys_in_file),
                    )

    if not output_lines:
            logger.info("No citebutton occurrences found in any file. Nothing to write.")
            return

    bib_path.write_text("\n".join(output_lines), encoding="utf-8")
    logger.info("Wrote %d unique reference(s) → %s", len(added_keys), bib_path.name)
    logger.info("Done. Full log at %s", log_path)


In [65]:
create_bibtex(FOLDER, PREFIX)

INFO | Scanning slides01-shapley-game-theory.tex


INFO | No citebuttons found in slides01-shapley-game-theory.tex
INFO | Scanning slides03-shap.tex
INFO | No citebuttons found in slides03-shap.tex
INFO | Scanning slides06-resources-and-software.tex
INFO | No citebuttons found in slides06-resources-and-software.tex
INFO | Scanning slides04-kernel-shap.tex
INFO | No citebuttons found in slides04-kernel-shap.tex
INFO | Scanning slides02-shapley-ml.tex
INFO | No citebuttons found in slides02-shapley-ml.tex
INFO | Scanning slides05-shap-global.tex
INFO | No citebuttons found in slides05-shap-global.tex
INFO | No citebutton occurrences found in any file. Nothing to write.


## Update .tex-s

In [76]:

#!/usr/bin/env python3
r"""
migrate_tex_with_furtherreading.py – update legacy Beamer .tex files.

For each top‑level .tex file whose basename starts with an optional *prefix*,
this script performs the following migrations in order:

1. **\documentclass** – convert the 11 pt / aspectratio=169 variant to the new
   10 pt style (commenting the old line).
2. **\usepackage → \input** – replace the legacy lmu‑lecture package include.
3. **Title block → \titlemeta{…}** – collapse the multi‑command header into the
   new macro.
4. **\citebutton{LABEL}{URL} → \furtherreading{KEY}** – replace every custom
   cite button with a reference to a BibTeX entry produced by the companion
   *extract_citebutton_to_bib.py* script.  The key is built with exactly the
   same heuristic so the two scripts remain in sync.

Each transformation comments the original code with a “% OLD” block and
introduces the replacement preceded by “% new”.  Untouched files are skipped
with a log entry.

Logs (console + file) live in the `auto/` subfolder.
"""

from __future__ import annotations

import logging
import os
import pathlib
import re
from typing import Callable, List, Tuple



# --------------------------------------------------------------------------- #
# 2. Compile all replacement patterns – each returns (new_text, changed)      #
# --------------------------------------------------------------------------- #

# (a) \documentclass
_docclass_re = re.compile(
    r"^(\\documentclass\[\s*)11pt,(compress,t,notes=noshow,\s*aspectratio=169,\s*xcolor=table\](\s*\{beamer\}))",
    re.M,
)

def _fix_docclass(text: str, with_comments: bool = True) -> Tuple[str, bool]:
    def _sub(m: re.Match[str]) -> str:
        old_line = m.group(0)
        new_line = r"\documentclass[10pt,compress,t,notes=noshow, xcolor=table]{beamer}"
        if with_comments:
            return f"% OLD\n%{old_line}\n% NEW\n{new_line}"
        else:
            return new_line

    new, n = _docclass_re.subn(_sub, text)
    return new, n > 0


# (b) \usepackage → \input
_usepkg_re = re.compile(r"^(\\)usepackage\{(\.\./\.\./style/)lmu-lecture\}", re.M)
def _fix_usepackage(text: str, with_comments: bool = True) -> Tuple[str, bool]:
    def _sub(m: re.Match[str]) -> str:
        old_line = m.group(0)
        new_line = (
            rf"{m.group(1)}input{{{m.group(2)}preamble}}" "\n"
            r"\input{../../latex-math/basic-math}" "\n"
            r"\input{../../latex-math/basic-ml}"
        )

        if with_comments:
            return f"% OLD\n%{old_line}\n% NEW\n{new_line}"
        else:
            return new_line

    new, n = _usepkg_re.subn(_sub, text)
    return new, n > 0


# (c) title/goal/chapter/lecture block → \titlemeta{...}
_block_re = re.compile(
    r"""
    \\newcommand\{\\titlefigure\}\{(?P<path>[^}]*)\}\s*
    \\newcommand\{\\learninggoals\}\{(?P<goals>.*?)\}\s*
    \\lecturechapter\{(?P<title>[^}]*)\}\s*
    \\lecture\{(?P<subtitle>[^}]*)\}
    """,
    re.S | re.X,
)

# ToDo: make subtitle an argument, not the main title
def _fix_block(text: str, title: str, with_comments: bool = True) -> Tuple[str, bool]:
    def _sub(m: re.Match[str]) -> str:
        old = m.group(0)
        new_block = (
            f"\\titlemeta{{\n{title}\n}}{{\n{m.group('title')}\n}}{{\n"
            f"{m.group('path')}\n}}{{\n{m.group('goals')}\n}}\n"
        )
        if with_comments:
            commented_old = "% OLD\n%" + old.replace("\n", "\n%")
            return f"{commented_old}\n% NEW\n{new_block}"
        else:
            return new_block

    new, n = _block_re.subn(_sub, text)
    return new, n > 0


# (d) \citebutton → \furtherreading


def _fix_citebutton(text: str, with_comments: bool = True) -> Tuple[str, bool]:
    changed = False

    def _sub(m: re.Match[str]) -> str:
        nonlocal changed
        label, url = m.group(1).strip(), m.group(2).strip()
        year_match = _YEAR_RE.search(label) or _YEAR_RE.search(url)
        year_val = year_match.group(0) if year_match else ""
        author_val = _guess_author(label)
        key = _make_key(author_val, year_val)
        old_cmd = m.group(0)
        new_cmd = rf"\furtherreading{{{key}}}"
        changed = True
        
        if with_comments:
            return f"% OLD\n%{old_cmd}\n% NEW\n{new_cmd}"
        else:
            return new_cmd
        
    new_text = _CITE_RE.sub(_sub, text)
    return new_text, changed


_TRANSFORMS: List[Callable[[str], Tuple[str, bool]]] = [
    _fix_docclass,
    _fix_usepackage,
    _fix_block,
    _fix_citebutton,
]

# --------------------------------------------------------------------------- #
# 3. Main runner                                                              #
# --------------------------------------------------------------------------- #

def run(folder: str | os.PathLike[str], 
    prefix: str = "slides",
    with_comment: bool = True,
    in_place: bool = False) -> None:
    """
    Convert all matching .tex files in *folder*.
    If in_place is False, create `<folder>/auto` and write changed files there.
    If in_place is True, backup originals to `<folder>/before_migration` and overwrite originals.
    """
    folder = pathlib.Path(folder).expanduser().resolve()
    if in_place:
        backup_dir = folder / "before_migration"
        backup_dir.mkdir(exist_ok=True)
        log_file = folder / "migration_inplace.log"
    else:
        auto_dir = folder / "auto"
        auto_dir.mkdir(exist_ok=True)
        log_file = auto_dir / "migration.log"

    logging.basicConfig(
    level=logging.INFO,
    format="%(levelname)s | %(message)s",
    handlers=[
        logging.StreamHandler(),
        # logging.FileHandler(log_file, encoding="utf-8", mode="w"),
    ],
    )
    logger = logging.getLogger("migrate_tex")

    logger.info("Starting migration in %s (prefix '%s')", folder, prefix)

    for tex_path in folder.glob(f"{prefix}*.tex"):
        if tex_path.is_dir():
            continue
        with tex_path.open(encoding="utf-8") as fh:
            content = fh.read()

        changed = False
        for fn in _TRANSFORMS:
            if fn is _fix_block:
                content, was_changed = fn(content, title=TITLE_MAIN, with_comments=with_comment)
            else:
                content, was_changed = fn(content, with_comments=with_comment)
            changed |= was_changed

        if not changed:
            logger.info("SKIP %s – no patterns found", tex_path.name)
            continue

        if in_place:
            # Backup original
            backup_path = backup_dir / tex_path.name
            if not backup_path.exists():
                tex_path.replace(backup_path)
                # Write modified content in place
                tex_path.write_text(content, encoding="utf-8")
                logger.info("UPDATED %s (backup in before_migration)", tex_path.name)
        else:
            new_name = f"{tex_path.stem}_auto.tex"
            out_path = auto_dir / new_name
            out_path.write_text(content, encoding="utf-8")
            logger.info("WROTE %s", new_name)

    logger.info("Migration finished. See %s for details.", log_file)





## Run

In [77]:
run(FOLDER, PREFIX, with_comment=INCLUDE_COMMENTS, in_place=True)

INFO | Starting migration in \\wsl.localhost\Ubuntu\root\lecture_service_attempt\slides\06_shapley (prefix 'slides')


INFO | UPDATED slides01-shapley-game-theory.tex (backup in before_migration)
INFO | UPDATED slides03-shap.tex (backup in before_migration)
INFO | UPDATED slides06-resources-and-software.tex (backup in before_migration)
INFO | UPDATED slides04-kernel-shap.tex (backup in before_migration)
INFO | UPDATED slides02-shapley-ml.tex (backup in before_migration)
INFO | UPDATED slides05-shap-global.tex (backup in before_migration)
INFO | Migration finished. See \\wsl.localhost\Ubuntu\root\lecture_service_attempt\slides\06_shapley\migration_inplace.log for details.


# Rename pdfs

In [2]:
from pathlib import Path

folder_pdfs = Path("pdfs", "06_shapley")

assert folder_pdfs.exists(), f"Folder {folder_pdfs} does not exist. Folders:\n {list(Path('pdfs').glob('*'))}"

folder_num = folder_pdfs.parts[-1].split("_")[0]


# num files
pdfs = list(folder_pdfs.glob("*.pdf"))

num_files = len(pdfs)
print(f"Found {num_files} PDF files in {folder_pdfs}.")

# sort pdfs
pdfs.sort(key=lambda p: p.stem.lower())

print(pdfs)

Found 12 PDF files in pdfs\06_shapley.
[WindowsPath('pdfs/06_shapley/06_01_before.pdf'), WindowsPath('pdfs/06_shapley/06_02_before.pdf'), WindowsPath('pdfs/06_shapley/06_03_before.pdf'), WindowsPath('pdfs/06_shapley/06_04_before.pdf'), WindowsPath('pdfs/06_shapley/06_05_before.pdf'), WindowsPath('pdfs/06_shapley/06_06_before.pdf'), WindowsPath('pdfs/06_shapley/lecture_service_attempt (1).pdf'), WindowsPath('pdfs/06_shapley/lecture_service_attempt (2).pdf'), WindowsPath('pdfs/06_shapley/lecture_service_attempt (3).pdf'), WindowsPath('pdfs/06_shapley/lecture_service_attempt (4).pdf'), WindowsPath('pdfs/06_shapley/lecture_service_attempt (5).pdf'), WindowsPath('pdfs/06_shapley/lecture_service_attempt (6).pdf')]


In [47]:
# rename to "{folder_num}_{index}_before.pdf"
for i, pdf in enumerate(pdfs, start=1):
    new_name = folder_pdfs / f"{folder_num}_{i:02d}_before.pdf"
    pdf.rename(new_name)
    print(f"Renamed {pdf} to {new_name}")


Renamed pdfs\06_shapley\lecture_iml (27).pdf to pdfs\06_shapley\06_01_before.pdf
Renamed pdfs\06_shapley\lecture_iml (28).pdf to pdfs\06_shapley\06_02_before.pdf
Renamed pdfs\06_shapley\lecture_iml (29).pdf to pdfs\06_shapley\06_03_before.pdf
Renamed pdfs\06_shapley\lecture_iml (30).pdf to pdfs\06_shapley\06_04_before.pdf
Renamed pdfs\06_shapley\lecture_iml (31).pdf to pdfs\06_shapley\06_05_before.pdf
Renamed pdfs\06_shapley\lecture_iml (32).pdf to pdfs\06_shapley\06_06_before.pdf


In [3]:
files_not_before = [pdf for pdf in pdfs if not pdf.stem.endswith("_before")]

for i, pdf in enumerate(files_not_before, start=1):
    new_name = folder_pdfs / f"{folder_num}_{i:02d}_after.pdf"
    pdf.rename(new_name)
    print(f"Renamed {pdf} to {new_name}")

Renamed pdfs\06_shapley\lecture_service_attempt (1).pdf to pdfs\06_shapley\06_01_after.pdf
Renamed pdfs\06_shapley\lecture_service_attempt (2).pdf to pdfs\06_shapley\06_02_after.pdf
Renamed pdfs\06_shapley\lecture_service_attempt (3).pdf to pdfs\06_shapley\06_03_after.pdf
Renamed pdfs\06_shapley\lecture_service_attempt (4).pdf to pdfs\06_shapley\06_04_after.pdf
Renamed pdfs\06_shapley\lecture_service_attempt (5).pdf to pdfs\06_shapley\06_05_after.pdf
Renamed pdfs\06_shapley\lecture_service_attempt (6).pdf to pdfs\06_shapley\06_06_after.pdf


# Count files

In [2]:
#!/usr/bin/env python3
"""
Count files in every sub‑directory under a given root and report the total.

Usage (from the command line):
    python count_files.py /path/to/root
"""

from pathlib import Path
import os
import sys
from pprint import pprint


def count_files_by_directory(root: str | os.PathLike = ".") -> tuple[dict[Path, int], int]:
    """
    Recursively count files inside every directory under *root*.

    Parameters
    ----------
    root : str | Path
        Directory to start walking from.

    Returns
    -------
    dir_counts : dict[Path, int]
        Mapping of absolute directory path → number of (non‑hidden) files it contains.
    total_files : int
        Sum of all counted files.
    """
    dir_counts: dict[Path, int] = {}
    total_files = 0
    root_path = Path(root).expanduser().resolve()

    for dirpath, _, filenames in os.walk(root_path):
        # Skip hidden files (starting with "."); remove this filter if you want to include them
        visible_files = [f for f in filenames if not f.startswith(".")]
        count = len(visible_files)

        dir_counts[Path(dirpath)] = count
        total_files += count

    return dir_counts, total_files


counts, total = count_files_by_directory()

pprint(counts)            # Nicely formats the per‑directory counts
print(f"\nTotal files: {total}")


{WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt'): 5,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git'): 7,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/branches'): 0,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/hooks'): 14,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/info'): 1,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs'): 1,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs/refs'): 0,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs/refs/heads'): 1,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs/refs/remotes'): 0,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/logs/refs/remotes/origin'): 1,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/.git/objects'): 0,
 WindowsPath('//wsl.localhost/Ubuntu/root/lecture_servi

In [3]:
#!/usr/bin/env python3
"""
Utility helpers:

- count_files_by_directory(root, include=None, exclude=None)
      → (dict[pathlib.Path, int], int)

- zip_tree(root, zip_path, include=None, exclude=None, *, keep_structure=True)
      → pathlib.Path  # the ZIP file that was written
"""

from pathlib import Path
import os
import zipfile
from typing import Iterable, Sequence


def _normalize_list(
    items: Sequence[str | os.PathLike] | None, root: Path
) -> set[Path] | None:
    """Return absolute Path objects or None if *items* is None/empty."""
    if not items:
        return None
    return {root.joinpath(Path(p).as_posix()).resolve() for p in items}


def _should_skip(path: Path, include: set[Path] | None, exclude: set[Path] | None) -> bool:
    """True if *path* (a directory) must be skipped according to include/exclude."""
    if include is not None:
        # Skip everything NOT under one of the include paths
        return not any(include_path in path.parents or include_path == path for include_path in include)
    if exclude is not None:
        # Skip anything that is (or is inside) an excluded dir
        return any(ex_path in path.parents or ex_path == path for ex_path in exclude)
    return False


def count_files_by_directory(
    root: str | os.PathLike = ".",
    *,
    include: Sequence[str | os.PathLike] | None = None,
    exclude: Sequence[str | os.PathLike] | None = None,
) -> tuple[dict[Path, int], int]:
    """
    Recursively count visible files in *root*, honoring optional include/exclude.

    include / exclude:
        Iterable of directory paths relative to *root*.
        • If *include* is given, ONLY those paths (and their sub‑trees) are considered.
        • If *include* is None but *exclude* is provided, those paths are skipped.
        • Leading dots (hidden dirs/files) are always ignored.
    """
    root = Path(root).expanduser().resolve()
    include_set = _normalize_list(include, root)
    exclude_set = _normalize_list(exclude, root)

    dir_counts: dict[Path, int] = {}
    total = 0

    for dirpath, _, filenames in os.walk(root):
        dir_path = Path(dirpath)

        # Determine if this directory should be processed
        if _should_skip(dir_path, include_set, exclude_set):
            continue

        # Skip hidden directories entirely
        if dir_path.name.startswith("."):
            continue

        visible_files = [f for f in filenames if not f.startswith(".")]
        count = len(visible_files)
        dir_counts[dir_path] = count
        total += count

    return dir_counts, total


def zip_tree(
    root: str | os.PathLike,
    zip_path: str | os.PathLike,
    include: Sequence[str | os.PathLike] | None = None,
    exclude: Sequence[str | os.PathLike] | None = None,
    *,
    keep_structure: bool = True,
) -> Path:
    """
    Compress *root* into *zip_path* applying the same include/exclude logic.

    Parameters
    ----------
    keep_structure : bool
        • True  → store each file with its path relative to *root* (default).  
        • False → store only basenames (may cause name clashes).

    Returns  -------
    Path to the created ZIP archive.
    """
    root = Path(root).expanduser().resolve()
    zip_path = Path(zip_path).expanduser().resolve()
    include_set = _normalize_list(include, root)
    exclude_set = _normalize_list(exclude, root)

    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        for dirpath, _, filenames in os.walk(root):
            dir_path = Path(dirpath)

            if _should_skip(dir_path, include_set, exclude_set):
                continue
            if dir_path.name.startswith("."):
                continue

            for fname in filenames:
                if fname.startswith("."):
                    continue
                fpath = dir_path / fname
                arcname = fpath.relative_to(root) if keep_structure else fpath.name
                zf.write(fpath, arcname)

    return zip_path


# -------- example usage --------
if __name__ == "__main__":
    project_root = Path(".")  # current directory

    # Only include specific sub‑folders (relative to root)
    include_dirs = ["src", "tests"]

    # counts, total = count_files_by_directory(project_root, include=include_dirs)
    # print("Per‑directory counts:")
    # for d, c in counts.items():
    #     print(f"{d}: {c}")
    # print(f"TOTAL: {total}")

    # Create archive, excluding virtual‑env and build artifacts
    archive = zip_tree(
        project_root,
        "to_overleaf.zip",
        include=["latex-math", "slides", "style"],
    )
    print(f"\nCreated ZIP archive at {archive}")



Created ZIP archive at \\wsl.localhost\Ubuntu\root\lecture_service_attempt\to_overleaf.zip


In [4]:
len(os.listdir("style"))

25

In [5]:
count_files_by_directory("style")

({WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/style'): 23,
  WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/style/color'): 3,
  WindowsPath('//wsl.localhost/Ubuntu/root/lecture_service_attempt/style/logos'): 24},
 50)

# hanging words

In [7]:
!uv pip install pdfplumber

[2mUsing Python 3.10.18 environment at: C:\Users\hayk_\.conda\envs\lectures[0m
[2mResolved [1m8 packages[0m [2min 777ms[0m[0m
[36m[1mDownloading[0m[39m pdfminer-six [2m(5.4MiB)[0m
[36m[1mDownloading[0m[39m cryptography [2m(3.2MiB)[0m
[36m[1mDownloading[0m[39m pypdfium2 [2m(2.8MiB)[0m
 [32m[1mDownloading[0m[39m pypdfium2
 [32m[1mDownloading[0m[39m cryptography
 [32m[1mDownloading[0m[39m pdfminer-six
[2mPrepared [1m6 packages[0m [2min 1.04s[0m[0m
[2mInstalled [1m6 packages[0m [2min 682ms[0m[0m
 [32m+[39m [1mcffi[0m[2m==1.17.1[0m
 [32m+[39m [1mcryptography[0m[2m==45.0.5[0m
 [32m+[39m [1mpdfminer-six[0m[2m==20250506[0m
 [32m+[39m [1mpdfplumber[0m[2m==0.11.7[0m
 [32m+[39m [1mpycparser[0m[2m==2.22[0m
 [32m+[39m [1mpypdfium2[0m[2m==4.30.1[0m


In [None]:
import pdfplumber
from pathlib import Path

pdf_path = Path("pdfs") / "02_interp_models_1" / "02_02_lm_simple_after.pdf" 
assert pdf_path.exists(), f"PDF file not found: {pdf_path}"
results = []

with pdfplumber.open(pdf_path) as pdf:
    for page_num, page in enumerate(pdf.pages, start=1):
        # Extract lines with positions
        for line in page.extract_text(x_tolerance=2, y_tolerance=2).split("\n"):
            # Very naïve header/footer filter:
            if page.height * 0.05 < page.extract_words()[0]['top'] < page.height * 0.95:
                words = line.strip().split()
                print(f"Page {page_num}: {line}")
                if len(words) == 1:
                    bbox = [w for w in page.extract_words() if w['text'] == words[0]][0]
                    results.append({
                        "page": page_num,
                        "text": words[0],
                        "bbox": (bbox['x0'], bbox['top'], bbox['x1'], bbox['bottom'])
                    })

# Show summary
for r in results:
    print(f"Page {r['page']}: “{r['text']}” at {r['bbox']}")


Page 1: Interpretable Machine Learning
Page 1: Linear Regression Model
Page 1: 400
Page 1: 300
Page 1: 200
Page 1: 100
Page 1: 0
Page 1: −100
Page 1: 0.0 2.5 5.0 7.5 10.0
Page 1: x
Page 1: y
Page 1: Learning goals
Page 1: LM basics and assumptions
Page 1: Interpretation of main effects in LM
Page 1: What are significant features?
Page 1: “400” at (30.7588364, 149.53615468000024, 36.12052232, 152.75059468000023)
Page 1: “300” at (30.7588364, 161.34743588000023, 36.12052232, 164.56187588000023)
Page 1: “200” at (30.7588364, 173.15514548000021, 36.12052232, 176.36958548000024)
Page 1: “100” at (30.7588364, 184.96642668000024, 36.12052232, 188.18086668000024)
Page 1: “0” at (34.334008, 196.77413628000022, 36.12123664, 199.98857628000025)
Page 1: “−100” at (28.8837464, 208.58541748000025, 36.12266528, 211.79985748000024)
Page 1: “x” at (92.40108079999999, 216.27292932000023, 94.3654608, 220.20168932000024)
Page 1: “y” at (23.92529412, 177.93287480000023, 27.85405412, 179.89725480000024)
