In [None]:

#!/usr/bin/env python3
"""
Make_Requirements.py — derive requirements.txt from project notebooks.

It scans for notebooks named like:
  - GirishK_PwrCst_Wk1_Section1-Business*.ipynb    (requested pattern)
  - GirishK_PwrCst_Wk1_Section*-Business*.ipynb    (convenience to capture Sections 1–N)

It extracts `import x` and `from x import y` statements from code cells, maps
module names to pip package names, and writes requirements.txt in the project
base directory.

Usage (CLI):
  python Make_Requirements.py
  python Make_Requirements.py --pinned                 # pin versions if available
  python Make_Requirements.py --include-notebooks=no   # skip adding jupyter deps
  python Make_Requirements.py --patterns "GirishK_PwrCst_Wk1_Section1-Business*.ipynb,more*.ipynb"
  python Make_Requirements.py --out requirements.txt

Usage (Notebook):
  %run Make_Requirements.py --pinned
  # or programmatic
  from Make_Requirements import derive_requirements
  derive_requirements(pinned=True)

Notes:
- Unknown args (like `-f kernel.json`) are tolerated and ignored.
- Standard library modules are filtered out.
"""

from __future__ import annotations
import argparse
import sys
import json
import re
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path

# -------- Project root detection --------
def find_project_root(start: Path) -> Path:
    p = start.resolve()
    for _ in range(12):
        if (p / ".git").exists() or (p / "results").exists() or (p / "data").exists():
            return p
        if p.parent == p:
            break
        p = p.parent
    return start.resolve()

# -------- Import parsing --------
IMPORT_RE = re.compile(r'^\s*import\s+([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)?(?:\s*,\s*[A-Za-z_]\w*(?:\.[A-Za-z_]\w*)?)*)', re.M)
FROM_RE   = re.compile(r'^\s*from\s+([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*)\s+import\s+', re.M)

# Common module->pip renames
MODULE_TO_PIP = {
    "sklearn": "scikit-learn",
    "cv2": "opencv-python",
    "PIL": "Pillow",
    "bs4": "beautifulsoup4",
    "yaml": "PyYAML",
    "jinja2": "Jinja2",
    # identity for common libs
    "numpy": "numpy",
    "pandas": "pandas",
    "matplotlib": "matplotlib",
    "seaborn": "seaborn",
    "scipy": "scipy",
    "statsmodels": "statsmodels",
    "xgboost": "xgboost",
    "lightgbm": "lightgbm",
    "catboost": "catboost",
    "plotly": "plotly",
    "altair": "altair",
    "requests": "requests",
    "tqdm": "tqdm",
    "numba": "numba",
    "torch": "torch",
    "tensorflow": "tensorflow",
}

NOTEBOOK_DEPS = ["jupyterlab", "ipykernel"]

def top_level(mod: str) -> str:
    return mod.split(".")[0]

def parse_imports_from_source(src: str) -> set[str]:
    mods = set()
    for m in IMPORT_RE.finditer(src):
        group = m.group(1)
        # handle "import a, b.c, d as e" by splitting on commas and spaces
        for part in re.split(r"\s*,\s*", group):
            # strip " as alias"
            part = re.sub(r"\s+as\s+\w+$", "", part.strip())
            if part:
                mods.add(top_level(part))
    for m in FROM_RE.finditer(src):
        mods.add(top_level(m.group(1)))
    return mods

def parse_notebook(path: Path) -> set[str]:
    try:
        nb = json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        return set()
    found = set()
    for cell in nb.get("cells", []):
        if cell.get("cell_type") != "code":
            continue
        src = "".join(cell.get("source", []))
        found |= parse_imports_from_source(src)
    return found

# -------- Resolution & writing --------
def stdlib_names() -> set[str]:
    import sys
    names = set(getattr(sys, "stdlib_module_names", set()))
    # Add a few that may be missing or aliased in some environments
    names |= {"pathlib", "re", "json", "os", "sys", "datetime", "itertools", "collections", "math", "random", "typing"}
    return names

def module_to_pip(mod: str) -> str | None:
    if mod in MODULE_TO_PIP:
        return MODULE_TO_PIP[mod]
    # If it's clearly stdlib, ignore
    if mod in stdlib_names():
        return None
    # default: assume same name is the pip package
    return mod

def get_version(pkg: str) -> str | None:
    try:
        mod = __import__(pkg if pkg != "scikit-learn" else "sklearn")
        v = getattr(mod, "__version__", None)
        if v is None and pkg == "matplotlib":
            import matplotlib
            v = matplotlib.__version__
        return v
    except Exception:
        return None

def derive_requirements(
    pinned: bool = False,
    include_notebooks: bool = True,
    patterns: list[str] | None = None,
    out: str | Path = "requirements.txt",
    start_dir: Path | None = None,
) -> Path:
    patterns = patterns or [
        "GirishK_PwrCst_Wk1_Section1-Business*.ipynb",
        "GirishK_PwrCst_Wk1_Section*-Business*.ipynb",
    ]
    root = find_project_root(start_dir or Path.cwd())
    nb_paths: list[Path] = []
    for pat in patterns:
        nb_paths.extend(root.rglob(pat))

    imports: set[str] = set()
    for nbp in sorted(set(nb_paths)):
        imports |= parse_notebook(nbp)

    # Map to pip names, drop stdlib/unknowns
    pkgs: set[str] = set()
    for mod in sorted(imports):
        pip_name = module_to_pip(mod)
        if pip_name:
            pkgs.add(pip_name)

    if include_notebooks:
        pkgs |= set(NOTEBOOK_DEPS)

    lines = ["# Auto-derived by Make_Requirements.py",
             f"# Generated: {datetime.now():%Y-%m-%d %H:%M:%S}",
             f"# Scanned patterns: {', '.join(patterns)}",
             ""]
    for pkg in sorted(pkgs):
        if pinned:
            ver = get_version(pkg)
            if ver:
                lines.append(f"{pkg}=={ver}")
            else:
                lines.append(pkg)  # fallback if version unknown
        else:
            lines.append(pkg)

    out_path = (root / out).resolve()
    out_path.write_text("\n".join(lines).strip() + "\n", encoding="utf-8")
    return out_path

def main(argv: list[str] | None = None):
    ap = argparse.ArgumentParser(add_help=True, allow_abbrev=False)
    ap.add_argument("--pinned", action="store_true", help="Pin versions if importable")
    ap.add_argument("--include-notebooks", choices=["yes","no"], default="yes",
                    help="Include jupyter deps (jupyterlab, ipykernel) [default: yes]")
    ap.add_argument("--patterns", type=str, default="GirishK_PwrCst_Wk1_Section1-Business*.ipynb,GirishK_PwrCst_Wk1_Section*-Business*.ipynb",
                    help="Comma-separated glob patterns to scan")
    ap.add_argument("--out", type=str, default="requirements.txt", help="Output filename relative to project root")

    # Tolerate unknown ipykernel args
    args, unknown = ap.parse_known_args(argv)

    pats = [p.strip() for p in args.patterns.split(",") if p.strip()]
    include_nb = (args.include_notebooks.lower() == "yes")

    path = derive_requirements(
        pinned=args.pinned,
        include_notebooks=include_nb,
        patterns=pats,
        out=args.out,
        start_dir=Path.cwd(),
    )
    print(f"Wrote {path}")

if __name__ == "__main__":
    main(sys.argv[1:])
