In [None]:
# common.py
from pathlib import Path
import logging
from datetime import datetime, timezone
import sqlite3

PROJECT_ROOT = Path(__file__).resolve().parent
DB_PATH = PROJECT_ROOT / "mlb_data.db"

# basic logger (file + console)
LOGS_DIR = PROJECT_ROOT / "logs"
LOGS_DIR.mkdir(exist_ok=True)
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler(LOGS_DIR / "pipeline.log", encoding="utf-8"),
        logging.StreamHandler(),
    ],
)
log = logging.getLogger("mlb-pipeline")

def utc_now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()

def get_conn() -> sqlite3.Connection:
    return sqlite3.connect(DB_PATH.as_posix())


In [None]:
# refresh_schedule_silver.py
import statsapi
import pandas as pd
from datetime import date, timedelta, datetime
from common import get_conn, log

def main():
    # rolling window: yesterday..tomorrow
    start_date = (date.today() - timedelta(days=1)).isoformat()
    end_date   = (date.today() + timedelta(days=1)).isoformat()

    log.info(f"Refreshing schedule_silver for {start_date}..{end_date}")

    data = statsapi.get("schedule", {"sportId": 1, "startDate": start_date, "endDate": end_date})

    rows = []
    for bucket in data.get("dates", []):
        for g in bucket.get("games", []):
            status = g.get("status", {}) or {}
            teams  = g.get("teams", {}) or {}
            home   = (teams.get("home") or {}).get("team") or {}
            away   = (teams.get("away") or {}).get("team") or {}
            venue  = g.get("venue", {}) or {}

            now = datetime.now()
            rows.append({
                "game_pk": g.get("gamePk"),
                "official_date": g.get("officialDate"),
                "game_datetime_utc": g.get("gameDate"),
                "status_code": status.get("statusCode"),
                "status_detailed": status.get("detailedState"),
                "game_type": g.get("gameType"),
                "series_game_number": g.get("seriesGameNumber"),
                "series_description": g.get("seriesDescription"),
                "doubleheader": g.get("doubleHeader"),
                "day_night": g.get("dayNight"),
                "scheduled_innings": g.get("scheduledInnings"),
                "home_team_id": home.get("id"),
                "home_team_name": home.get("name"),
                "away_team_id": away.get("id"),
                "away_team_name": away.get("name"),
                "venue_id": venue.get("id"),
                "venue_name": venue.get("name"),
                "created_at": now.isoformat(),
                "last_updated": now.isoformat(),
            })

    df = pd.DataFrame(rows)
    log.info(f"Built {len(df)} rows")

    with get_conn() as conn:
        df.to_sql("schedule_silver", conn, if_exists="replace", index=False)

    log.info("✅ schedule_silver refresh complete")

if __name__ == "__main__":
    main()


In [None]:
python refresh_schedule_silver.py


In [None]:
# -------------------------
# Utility functions
# -------------------------

def list_tables() -> None:
    """
    Quick helper to print all tables currently in the SQLite database.
    Useful for debugging after refresh jobs.
    """
    with get_conn() as conn:
        cursor = conn.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cursor.fetchall()
    print("📊 Tables in database:", tables)
