In [1]:
import statsapi
from datetime import date, timedelta

yesterday = (date.today() - timedelta(days=1)).isoformat()

# Call wrapper for schedule
schedule_data = statsapi.get("schedule", {"sportId": 1, "date": yesterday})

print("Type:", type(schedule_data))         # dict
print("Top-level keys:", list(schedule_data.keys()))


Type: <class 'dict'>
Top-level keys: ['copyright', 'totalItems', 'totalEvents', 'totalGames', 'totalGamesInProgress', 'dates']


In [2]:
dates = schedule_data.get("dates", [])
print("Number of date buckets:", len(dates))

if dates:
    first_bucket = dates[0]
    print("Date bucket keys:", list(first_bucket.keys()))

    games = first_bucket.get("games", [])
    print("Number of games on this date:", len(games))

    if games:
        first_game = games[0]
        print("First game keys:", list(first_game.keys()))


Number of date buckets: 1
Date bucket keys: ['date', 'totalItems', 'totalEvents', 'totalGames', 'totalGamesInProgress', 'games', 'events']
Number of games on this date: 15
First game keys: ['gamePk', 'gameGuid', 'link', 'gameType', 'season', 'gameDate', 'officialDate', 'status', 'teams', 'venue', 'content', 'isTie', 'gameNumber', 'publicFacing', 'doubleHeader', 'gamedayType', 'tiebreaker', 'calendarEventID', 'seasonDisplay', 'dayNight', 'scheduledInnings', 'reverseHomeAwayStatus', 'inningBreakLength', 'gamesInSeries', 'seriesGameNumber', 'seriesDescription', 'recordSource', 'ifNecessary', 'ifNecessaryDescription']


In [3]:
clean_games = []

for bucket in dates:
    for g in bucket.get("games", []):
        status = g.get("status", {}) or {}
        teams  = g.get("teams", {}) or {}
        home   = (teams.get("home") or {}).get("team") or {}
        away   = (teams.get("away") or {}).get("team") or {}
        venue  = g.get("venue", {}) or {}

        clean_games.append({
            "gamePk": g.get("gamePk"),
            "officialDate": g.get("officialDate"),
            "gameDate_utc": g.get("gameDate"),
            "status_code": status.get("statusCode"),
            "status_detailed": status.get("detailedState"),
            "gameType": g.get("gameType"),
            "seriesGameNumber": g.get("seriesGameNumber"),
            "seriesDescription": g.get("seriesDescription"),
            "doubleHeader": g.get("doubleHeader"),
            "dayNight": g.get("dayNight"),
            "scheduledInnings": g.get("scheduledInnings"),

            "home_team_id": home.get("id"),
            "home_team_name": home.get("name"),
            "away_team_id": away.get("id"),
            "away_team_name": away.get("name"),

            "venue_id": venue.get("id"),
            "venue_name": venue.get("name"),
        })

print(f"Built {len(clean_games)} game records")
for row in clean_games[:3]:
    print(row)


Built 15 game records
{'gamePk': 776428, 'officialDate': '2025-09-07', 'gameDate_utc': '2025-09-07T16:05:00Z', 'status_code': 'F', 'status_detailed': 'Final', 'gameType': 'R', 'seriesGameNumber': 3, 'seriesDescription': 'Regular Season', 'doubleHeader': 'N', 'dayNight': 'day', 'scheduledInnings': 9, 'home_team_id': 144, 'home_team_name': 'Atlanta Braves', 'away_team_id': 136, 'away_team_name': 'Seattle Mariners', 'venue_id': 4705, 'venue_name': 'Truist Park'}
{'gamePk': 776421, 'officialDate': '2025-09-07', 'gameDate_utc': '2025-09-07T16:10:00Z', 'status_code': 'F', 'status_detailed': 'Final', 'gameType': 'R', 'seriesGameNumber': 4, 'seriesDescription': 'Regular Season', 'doubleHeader': 'N', 'dayNight': 'day', 'scheduledInnings': 9, 'home_team_id': 139, 'home_team_name': 'Tampa Bay Rays', 'away_team_id': 114, 'away_team_name': 'Cleveland Guardians', 'venue_id': 2523, 'venue_name': 'George M. Steinbrenner Field'}
{'gamePk': 776427, 'officialDate': '2025-09-07', 'gameDate_utc': '2025-09-

In [4]:
import pandas as pd

df_schedule = pd.DataFrame(clean_games)
print("Shape:", df_schedule.shape)
df_schedule.head(10)


Shape: (15, 17)


Unnamed: 0,gamePk,officialDate,gameDate_utc,status_code,status_detailed,gameType,seriesGameNumber,seriesDescription,doubleHeader,dayNight,scheduledInnings,home_team_id,home_team_name,away_team_id,away_team_name,venue_id,venue_name
0,776428,2025-09-07,2025-09-07T16:05:00Z,F,Final,R,3,Regular Season,N,day,9,144,Atlanta Braves,136,Seattle Mariners,4705,Truist Park
1,776421,2025-09-07,2025-09-07T16:10:00Z,F,Final,R,4,Regular Season,N,day,9,139,Tampa Bay Rays,114,Cleveland Guardians,2523,George M. Steinbrenner Field
2,776427,2025-09-07,2025-09-07T17:35:00Z,F,Final,R,3,Regular Season,N,day,9,110,Baltimore Orioles,119,Los Angeles Dodgers,2,Oriole Park at Camden Yards
3,776423,2025-09-07,2025-09-07T17:35:00Z,F,Final,R,3,Regular Season,N,day,9,147,New York Yankees,141,Toronto Blue Jays,3313,Yankee Stadium
4,776419,2025-09-07,2025-09-07T17:35:00Z,F,Final,R,3,Regular Season,N,day,9,134,Pittsburgh Pirates,158,Milwaukee Brewers,31,PNC Park
5,776429,2025-09-07,2025-09-07T17:40:00Z,F,Final,R,3,Regular Season,N,day,9,113,Cincinnati Reds,121,New York Mets,2602,Great American Ball Park
6,776425,2025-09-07,2025-09-07T17:40:00Z,F,Final,R,3,Regular Season,N,day,9,116,Detroit Tigers,145,Chicago White Sox,2394,Comerica Park
7,776426,2025-09-07,2025-09-07T17:40:00Z,F,Final,R,3,Regular Season,N,day,9,146,Miami Marlins,143,Philadelphia Phillies,4169,loanDepot park
8,776422,2025-09-07,2025-09-07T18:10:00Z,F,Final,R,3,Regular Season,N,day,9,118,Kansas City Royals,142,Minnesota Twins,7,Kauffman Stadium
9,776416,2025-09-07,2025-09-07T18:15:00Z,F,Final,R,3,Regular Season,N,day,9,138,St. Louis Cardinals,137,San Francisco Giants,2889,Busch Stadium


In [5]:
import sqlite3

# Point to the root db file (outside MLB Api Endpoints)
db_path = r"C:\Users\sneal\Dev\mlb-data-pipeline\mlb_data.db"

conn = sqlite3.connect(db_path)

df_schedule.to_sql("schedule_silver", conn, if_exists="replace", index=False) # in Schedule notebook
conn.close()

print("✅ teams_silver written to root mlb_data.db")


✅ teams_silver written to root mlb_data.db
