In [None]:
"""
   This is our messy script
"""
# Goal (eventually): read a JSON list of records, simulate some simple analysis and print a summary.
# At the moment the file is full of noise: everything is wrong on purpose.

import json, os, sys, time, random, datetime as dt
from pathlib import Path

CONFIG = {"pth":"data\\sample.json", "ENC":"utf8", "thres":0, "mode":"OK"}  # mutates at runtime
cache = {}     # global cache for… no reason
DATA = None    # will be populated on import (side-effect)

# side-effect on import (don’t do this)
try:
    with open(CONFIG["pth"], "r", encoding=CONFIG["ENC"]) as f:
        DATA = json.loads(f.read())
except:
    DATA = [{"STATUS":"ok","value":"3"}, {"STATUS":"bad","value": "x"}, {"STATUS":"ok","value":7}]
    print("could not read file, using backup data!!!")  # noisy import

# random environment “feature”
if random.random() < 0.1:
    CONFIG["mode"] = "ALL"

# inconsistent naming + magic numbers
def filtOk(items, THRESH=CONFIG["thres"]):
    out=[]
    for i in range(0,len(items)):
        try:
            v=items[i].get("value",0)
            if type(v) is str:
                v=int(v)  # maybe…
            s=items[i].get("status", items[i].get("STATUS","??")).lower()
            if (s=="ok" or CONFIG["mode"]=="ALL") and v>=THRESH:
                out.append({"status":s,"value":v})
        except:
            pass
    return out

# duplicated-but-different function doing almost the same
def filter_good(x, threshold=0):
    r=[]
    i=0
    while i<len(x):
        z=x[i]
        try:
            vv=z.get("value", None)
            if vv is None: vv=0
            if isinstance(vv,str):
                try: vv=float(vv)
                except: vv=0
            st=(z["status"] if "status" in z else z.get("STATUS","BAD")).lower()
            if st.startswith("o") and not (vv<threshold):
                r.append({"status":st,"value":vv})
        except:
            # swallow everything
            ...
        i+=1
    return r

# mutable default arg + hidden I/O + side effects
def compute(items, acc=[]):
    # fake “config reload”
    if os.path.exists("config.json"):
        try:
            CONFIG.update(json.loads(Path("config.json").read_text()))
        except:
            print("config broken, ignoring")
    for it in items:
        acc.append(it.get("value",0))
    # pointless sleep
    time.sleep(0.01)
    s = sum(acc)  # includes values from previous runs! 🙃
    avg = (s / len(acc)) if acc else 0
    return {"count":len(items),"sum":s,"avg":avg}

# dead code but still executed
def do_everything_and_nothing(records):
    # eval for config tweaks (yikes)
    if os.environ.get("TWEAK"):
        try:
            eval(os.environ["TWEAK"])
        except Exception as e:
            print("tweak failed:", e)
    # dynamic import for vibes
    try:
        __import__("math")
    except:
        pass
    # platform-specific nonsense
    if sys.platform.startswith("win"):
        os.system("dir > NUL")  # no-op
    else:
        os.system("ls >/dev/null 2>&1")
    # overwrite global cache unpredictably
    cache["last"] = dt.datetime.now().isoformat()
    return [r for r in records if r]  # ¯\_(ツ)_/¯

# weird CLI-ish behavior embedded in logic
def main(argv=None):
    if argv is None: argv = sys.argv[1:]
    # ad-hoc arg parsing
    if "--file" in argv:
        try:
            p = argv[argv.index("--file")+1]
            CONFIG["pth"] = p
            DATA[:] = json.loads(Path(p).read_text(encoding=CONFIG["ENC"]))
        except:
            print("could not load file from args, continuing with DATA…")
    if "--all" in argv:
        CONFIG["mode"] = "ALL"
    if "--thres" in argv:
        try:
            CONFIG["thres"] = int(argv[argv.index("--thres")+1])
        except:
            print("bad thres, keeping", CONFIG["thres"])
    # choose a random filter path to spice things up
    records = random.choice([filtOk(DATA, CONFIG["thres"]), filter_good(DATA, CONFIG["thres"])])
    records = do_everything_and_nothing(records)
    res = compute(records)  # uses sticky mutable default!
    # mixed concerns: formatting, logic, printing here
    stamp = dt.datetime.now().strftime("%Y/%m/%d-%H:%M:%S")
    print(f"[{stamp}] ok_count={res['count']} total_value={res['sum']} avg={res['avg']:.2f}")
    # return nothing (hard to test)
    # also mutate global CONFIG for no reason
    CONFIG["last_avg"] = res["avg"]

# “library” code runs on import AND when executed
if __name__ == "__main__" or random.random() < 0.05:
    main()

In [None]:
"""
   What is wrong with this code?
"""
"""
1. Architecture / determinism
   - Import-time side effects: reading files, printing, mutating CONFIG,
     and randomness (random.random() < 0.1) all happen at import, nasty
     in design and make testing difficult
   - Global variables are everywhere: CONFIG, DATA, cache are mutated
     from multiple places - this is hidden coupling, may introduce problems
   - Random behavior in logic: random filter choice and random mode toggle
     hard to test

2. APIs / testability
   - main() prints but returns nothing: can’t test results without capturing
     stdout; better to return a value and let the caller format/print.
   - Hidden I/O: compute() reads config.json and sleeps
   - Mutable default arg

3. Error handling & safety
   - Bare except: everywhere can hide the real bugs and makes debugging
     impossible.
   - os.system(...) noise with no value; platform branches do nothing.

4. Data handling
   - Duplicate “almost the same” functions: filtOk vs filter_good diverge in
     details (int vs float, default status, thresholds), inconsistent behavior.
   - Inconsistent keys: flip between STATUS and status; ad-hoc casing.
   - Magic numbers / strings: "ALL", "ok", "??", bad, bad approach

5. CLI & user experience
   - Ad-hoc arg parsing: manual argv indexing and bare excepts.
   - Path/encoding: hardcoded Windows-style path in CONFIG["pth"].

6. Style & maintenance
   - No typing, no docstrings, inconsistent naming, sleeps, dead code
     (do_everything_and_nothing).
"""

In [None]:
"""
   A way to fix it, basic, can be extended!
"""
"""
1. Make it deterministic & side-effect free on import
  - Move all I/O and randomness into main() (or a run() function).
  - Remove the random mode flip and random filter choice, can be set
    as parameters.

2. Replace globals with explicit configuration
  - Define an immutable Config and pass it down.
  - If you must have a mutable config, scope it to main().

3. One filter, one truth
  - Write a single filter_records(records, *, threshold, mode) with consistent
    names: (STATUS/status), casefold, parse value robustly (int/float).
  - Return a new list; no mutation.

4. Proper CLI and logging
  - Use argparse for --file, --threshold, --mode {OK,ALL}; validate inputs.
  - Use logging for messages; no prints inside libraries.

5. Better error handling & security
  - Catch specific exceptions (FileNotFoundError, JSONDecodeError),
    surface helpful messages.
  - Remove import-time code and random triggers under
    if __name__ == "__main__".

6. Add types, tests, and formatting
  - Type hints.
  - Unit tests for: value parsing, filtering logic, compute stats, CLI.
"""

In [None]:
"""
   Examples
"""

In [None]:
"""
   Package structure - an example, add also the test data folder including
   a script to generate syntetic data and a sample json file
"""
"""
pite_project1/
├─ pyproject.toml
├─ README.md
├─ LICENSE
├─ src/
│  └─ pite_project1/
│     ├─ __init__.py
│     ├─ cli.py
│     ├─ core.py
│     └─ io_.py
└─ tests/
   ├─ conftest.py
   ├─ test_core.py
   └─ test_cli.py
"""

In [None]:
"""
   Example toml file
"""
"""
## pyproject.toml

[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "acme-reporter"
version = "0.1.0"
description = "Mini reporting tool (class project)"
readme = "README.md"
requires-python = ">=3.10"
authors = [{ name = "Student", email = "student@example.com" }]
license = { text = "MIT" }
dependencies = [
  "typer>=0.12",
]

[project.scripts]
acme-reporter = "acme_reporter.cli:app"

[tool.pytest.ini_options]
addopts = "-q"
pythonpath = ["src"]
"""

In [None]:
"""
   Separate I/O module implementation
"""

from __future__ import annotations
from pathlib import Path
import json
from typing import Any

Record = dict[str, Any]


def load_json(path: str | Path, encoding: str = "utf-8") -> list[Record]:
    """Load a list of JSON records from *path*.

    Pure function apart from file I/O. Validates top-level type.
    """
    data = json.loads(Path(path).read_text(encoding=encoding))
    if not isinstance(data, list):
        raise ValueError("Expected a JSON list of records")
    return data


def dump_json(path: str | Path, records: list[Record], encoding: str = "utf-8") -> None:
    Path(path).write_text(json.dumps(records, ensure_ascii=False, indent=2), encoding=encoding)


In [None]:
"""
   Example of tidy and consistent filter
"""

def filter_ok(records: Iterable[dict], *, threshold: float = 0) -> list[dict]:
    """Return records with status=="ok" and value >= threshold.

    - Case-insensitive status handling, tolerates {"STATUS": ...} as well.
    - Ignores malformed records gracefully without side effects.
    """
    out: list[dict] = []
    for r in records:
        if not isinstance(r, dict):
            continue
        status = r.get("status", r.get("STATUS", "")).lower()
        if status == "ok":
            val = _coerce_value(r.get("value", 0))
            if val >= threshold:
                out.append({"status": "ok", "value": val})
    return out

In [None]:
"""
   CI example file
"""
## .github/workflows/ci.yml (GitHub Actions)

name: CI

on:
  push:
    branches: [ main, master ]
  pull_request:
    branches: [ main, master ]

jobs:
  test:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.10", "3.11", "3.12"]

    steps:
      - name: Checkout
        uses: actions/checkout@v4

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'

      - name: Install project (editable) and tools
        run: |
          python -m pip install --upgrade pip
          python -m pip install -e .
          python -m pip install pytest mypy ruff black pre-commit

      - name: Run unit tests
        run: pytest -q

      - name: Type check (mypy)
        run: mypy src

      - name: Lint (ruff)
        run: ruff check src tests

      - name: Format check (black)
        run: black --check src tests

      - name: Pre-commit (meta hooks)
        run: pre-commit run --all-files


In [None]:
"""
   Pre-commit hooks
"""
## .pre-commit-config.yaml (Pre-commit hooks)
repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.6.0
    hooks:

      - id: trailing-whitespace        # removes stray spaces at line ends
      - id: end-of-file-fixer          # ensures files end with a single newline
      - id: check-merge-conflict       # rejects <<< >>> conflict markers
      - id: check-yaml                 # validates .yml/.yaml syntax
      - id: check-toml                 # validates .toml syntax
      - id: detect-private-key         # blocks committing private keys

  - repo: https://github.com/psf/black
    rev: 24.8.0
    hooks:
      - id: black
        args: ["--line-length=100"]

  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.6.8
    hooks:
      - id: ruff
        args: ["--fix"]

  - repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.11.2
    hooks:
      - id: mypy
        additional_dependencies: ["types-requests"]
        args: ["--strict", "src"]

In [None]:
"""
   Dessert
   https://www.youtube.com/watch?v=e9lnsKot_SQ
"""