In [None]:
import os
import json
import tokenize
from typing import Dict, Optional, Tuple, List
from pathlib import Path
from dataclasses import dataclass
import pandas as pd

print("Imports OK")


In [None]:
# === Edit this path to your project root ===
ROOT_DIR = Path("/path/to/your/python/project")   # <-- change this

# File extensions to include
EXTENSIONS = (".py", ".pyw")

# Excluded directories (add more as needed)
EXCLUDE_DIRS = {
    ".git", ".hg", ".svn", "__pycache__", "node_modules", ".venv", "venv",
    ".mypy_cache", ".pytest_cache", ".tox", "build", "dist"
}

# Output directory for CSV/JSON (created if missing)
OUTPUT_DIR = Path("./syntax_scan_outputs")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print("Configured. ROOT_DIR =", ROOT_DIR)


In [None]:
@dataclass
class ErrorRecord:
    file: str
    lineno: Optional[int]
    offset: Optional[int]
    msg: str
    text: str
    caret: str
    category: str   # 'syntax' or 'non-syntax'
    error_type: str # Exception class name

def iter_python_files(root: Path, exts: Tuple[str, ...], exclude_dirs: set) -> List[Path]:
    files = []
    for dirpath, dirnames, filenames in os.walk(root):
        # prune excluded directories in-place
        dirnames[:] = [d for d in dirnames if d not in exclude_dirs]
        for name in filenames:
            if name.endswith(exts):
                files.append(Path(dirpath) / name)
    return files

def compile_file(path: Path) -> Optional[ErrorRecord]:
    """Return an ErrorRecord if file has a SyntaxError or non-syntax read error; else None."""
    try:
        with tokenize.open(path) as f:  # respects source encoding (PEP 263)
            src = f.read()
        compile(src, str(path), "exec")
        return None
    except SyntaxError as e:
        offending_line = e.text.rstrip("\n") if e.text else ""
        caret_line = ""
        if e.text and e.offset and e.offset >= 1:
            caret_line = " " * (e.offset - 1) + "^"
        return ErrorRecord(
            file=str(path),
            lineno=e.lineno,
            offset=e.offset,
            msg=e.msg,
            text=offending_line,
            caret=caret_line,
            category="syntax",
            error_type=type(e).__name__,
        )
    except Exception as e:
        # Non-syntax issues (I/O, decoding, permission, etc.)
        return ErrorRecord(
            file=str(path),
            lineno=None,
            offset=None,
            msg=f"{type(e).__name__}: {e}",
            text="",
            caret="",
            category="non-syntax",
            error_type=type(e).__name__,
        )

print("Utilities ready")


In [None]:
assert ROOT_DIR.exists() and ROOT_DIR.is_dir(), f"Not a directory: {ROOT_DIR}"

paths = iter_python_files(ROOT_DIR, EXTENSIONS, EXCLUDE_DIRS)
print(f"Discovered {len(paths)} Python file(s) under {ROOT_DIR}")

records: List[ErrorRecord] = []
for p in paths:
    rec = compile_file(p)
    if rec is not None:
        records.append(rec)

print(f"Collected {len(records)} issue(s)")


In [None]:
if records:
    df = pd.DataFrame([{
        "file": r.file,
        "lineno": r.lineno,
        "offset": r.offset,
        "message": r.msg,
        "text": r.text,
        "caret": r.caret,
        "category": r.category,
        "error_type": r.error_type,
    } for r in records])
else:
    df = pd.DataFrame(columns=[
        "file", "lineno", "offset", "message", "text", "caret", "category", "error_type"
    ])

df


In [None]:
summary = {
    "scanned_files": len(paths),
    "issues_found": len(df),
    "syntax_errors": int((df["category"] == "syntax").sum()) if len(df) else 0,
    "non_syntax_issues": int((df["category"] == "non-syntax").sum()) if len(df) else 0,
}
summary


In [None]:
timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
csv_path = OUTPUT_DIR / f"syntax_errors_{timestamp}.csv"
json_path = OUTPUT_DIR / f"syntax_errors_{timestamp}.json"

df.to_csv(csv_path, index=False)
df.to_json(json_path, orient="records", indent=2)

print("Saved:")
print(" - CSV :", csv_path.resolve())
print(" - JSON:", json_path.resolve())
