In [1]:
import sqlite3
from pathlib import Path
from collections import defaultdict, OrderedDict
from typing import Dict, Iterable, Optional, Tuple

APP_NAME_PLAIN = OrderedDict([
    ("A1", "WhatsApp"),
    ("A2", "Snapchat"),
    ("A3", "Telegram"),
    ("A4", "Google Maps"),
    ("A5", "Samsung Internet"),
    ("I1", "WhatsApp"),
    ("I2", "Contacts"),
    ("I3", "Apple Messages"),
    ("I4", "Safari"),
    ("I5", "Calendar"),
])

PATTERNS = ("*.db", "*.sqlite", "*.sqlitedb", "*.sqlite3")


def get_app_code_from_filename(db_file: Path) -> str:
    stem = db_file.stem
    if "_" in stem:
        return stem.split("_", 1)[0]
    if "-" in stem:
        return stem.split("-", 1)[0]
    return stem


def count_columns_in_db(db_path: Path) -> int:
    conn: Optional[sqlite3.Connection] = None
    total_cols = 0
    try:
        conn = sqlite3.connect(str(db_path))
        cur = conn.cursor()

        cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = [row[0] for row in cur.fetchall()]

        for table_name in tables:
            cur.execute(f"PRAGMA table_info('{table_name}');")
            total_cols += len(cur.fetchall())

    except sqlite3.Error as e:
        print(f"[WARN] {db_path}: {e}")
        return 0

    finally:
        if conn is not None:
            conn.close()

    return total_cols


def iter_db_files(in_dir: Path, patterns: Iterable[str]) -> Iterable[Path]:
    seen = set()
    for pat in patterns:
        for fp in in_dir.glob(pat):
            p = fp.resolve()
            if p in seen:
                continue
            seen.add(p)
            yield fp


def write_app_column_totals(
    in_dir: str | Path,
    out_csv: str | Path,
    patterns: Tuple[str, ...] = PATTERNS,
) -> Path:
    """
    Writes a CSV with strict columns (easy to parse later):

      app_code,app_name,total_columns

    Notes:
    - app_name is quoted if it contains commas/quotes.
    - total_columns is an integer.
    """
    in_dir = Path(in_dir)
    out_csv = Path(out_csv)
    out_csv.parent.mkdir(parents=True, exist_ok=True)

    if not in_dir.exists():
        raise FileNotFoundError(f"Input folder not found: {in_dir.resolve()}")

    totals_by_app: Dict[str, int] = defaultdict(int)

    files = list(iter_db_files(in_dir, patterns))
    if not files:
        out_csv.write_text("app_code,app_name,total_columns\n", encoding="utf-8")
        return out_csv

    for fp in sorted(files):
        app_code = get_app_code_from_filename(fp)
        totals_by_app[app_code] += count_columns_in_db(fp)

    app_order = list(APP_NAME_PLAIN.keys()) + [a for a in sorted(totals_by_app.keys()) if a not in APP_NAME_PLAIN]

    def csv_escape(s: str) -> str:
        # Minimal CSV escaping: wrap in quotes if needed
        if any(ch in s for ch in [",", '"', "\n", "\r"]):
            return '"' + s.replace('"', '""') + '"'
        return s

    lines = ["app_code,app_name,total_columns"]
    for app_code in app_order:
        if app_code not in totals_by_app:
            continue
        app_name = APP_NAME_PLAIN.get(app_code, app_code)
        lines.append(f"{csv_escape(app_code)},{csv_escape(app_name)},{totals_by_app[app_code]}")

    out_csv.write_text("\n".join(lines) + "\n", encoding="utf-8")
    return out_csv


if __name__ == "__main__":
    IN_DIR = Path(r"..\..\selectedDBs")  # change to your folder
    OUT_CSV = Path("app_total_columns.csv")

    out = write_app_column_totals(IN_DIR, OUT_CSV, patterns=PATTERNS)
    print(f"Wrote: {out.resolve()}")


[WARN] ..\..\selectedDBs\I2_AddressBook.sqlitedb: unknown tokenizer: ab_cf_tokenizer
Wrote: I:\project2026\llmagent\RQs\RQ2\app_total_columns.csv
