In [1]:
import statsapi
import pandas as pd
import sqlite3
from datetime import date, timedelta, datetime

# --- Step 1: Fetch schedule data ---
# Grab yesterday, today, and tomorrow (so nothing slips through)
start_date = (date.today() - timedelta(days=1)).isoformat()
end_date   = (date.today() + timedelta(days=1)).isoformat()

schedule_data = statsapi.get(
    "schedule",
    {"sportId": 1, "startDate": start_date, "endDate": end_date}
)

In [2]:
# --- Step 2: Parse into clean rows ---
clean_games = []

for bucket in schedule_data.get("dates", []):
    for g in bucket.get("games", []):
        status = g.get("status", {}) or {}
        teams  = g.get("teams", {}) or {}
        home   = (teams.get("home") or {}).get("team") or {}
        away   = (teams.get("away") or {}).get("team") or {}
        venue  = g.get("venue", {}) or {}

        clean_games.append({
            "game_pk": g.get("gamePk"),
            "official_date": g.get("officialDate"),
            "game_datetime_utc": g.get("gameDate"),
            "status_code": status.get("statusCode"),
            "status_detailed": status.get("detailedState"),
            "game_type": g.get("gameType"),
            "series_game_number": g.get("seriesGameNumber"),
            "series_description": g.get("seriesDescription"),
            "doubleheader": g.get("doubleHeader"),
            "day_night": g.get("dayNight"),
            "scheduled_innings": g.get("scheduledInnings"),
            "home_team_id": home.get("id"),
            "home_team_name": home.get("name"),
            "away_team_id": away.get("id"),
            "away_team_name": away.get("name"),
            "venue_id": venue.get("id"),
            "venue_name": venue.get("name"),
            "created_at": datetime.now(),
            "last_updated": datetime.now()
        })

print(f"✅ Built {len(clean_games)} schedule records")


✅ Built 41 schedule records


In [3]:
# --- Step 3: Load into DataFrame ---
df_schedule = pd.DataFrame(clean_games)
print("Shape:", df_schedule.shape)
print(df_schedule.head(3))

Shape: (41, 19)
   game_pk official_date     game_datetime_utc status_code status_detailed  \
0   776413    2025-09-08  2025-09-08T22:40:00Z           F           Final   
1   776409    2025-09-08  2025-09-08T22:40:00Z           F           Final   
2   776405    2025-09-08  2025-09-08T22:45:00Z           F           Final   

  game_type  series_game_number series_description doubleheader day_night  \
0         R                   1     Regular Season            N     night   
1         R                   1     Regular Season            N     night   
2         R                   1     Regular Season            N     night   

   scheduled_innings  home_team_id         home_team_name  away_team_id  \
0                  9           114    Cleveland Guardians           118   
1                  9           146          Miami Marlins           120   
2                  9           143  Philadelphia Phillies           121   

         away_team_name  venue_id          venue_name  \
0   

In [4]:
# --- Step 4: Write to SQLite ---
db_path = r"C:\Users\sneal\Dev\mlb-data-pipeline\mlb_data.db"
conn = sqlite3.connect(db_path)

df_schedule.to_sql("schedule_silver", conn, if_exists="replace", index=False)

conn.close()

print(f"✅ schedule_silver written: {len(df_schedule)} rows")

✅ schedule_silver written: 41 rows
