In [2]:
# STEP 1 — Explore /schedule endpoint with required parameters

import requests
from datetime import date, timedelta

url = "https://statsapi.mlb.com/api/v1/schedule"

# Use today's date
yesterday = (date.today() - timedelta(days=1)).isoformat()

params = {
    "sportId": 1,           # 1 = MLB
    "date": yesterday           # or use startDate / endDate if you want a range
}

response = requests.get(url, params=params)

print("HTTP status:", response.status_code)
print("Requested URL:", response.url)


HTTP status: 200
Requested URL: https://statsapi.mlb.com/api/v1/schedule?sportId=1&date=2025-09-04


In [3]:
# STEP 2 — Convert the response to JSON and peek

data = response.json()   # convert raw response into a Python dict

print("Top-level type:", type(data))
print("Top-level keys:", list(data.keys()))


Top-level type: <class 'dict'>
Top-level keys: ['copyright', 'totalItems', 'totalEvents', 'totalGames', 'totalGamesInProgress', 'dates']


In [4]:
# STEP 2 — Inspect the schedule structure

sched = response.json()

# The root keys
print("Top-level keys:", list(sched.keys()))

# Schedule data lives under 'dates'
dates = sched.get("dates", [])
print("Number of date buckets:", len(dates))

if dates:
    first_bucket = dates[0]
    print("Date bucket keys:", list(first_bucket.keys()))

    games = first_bucket.get("games", [])
    print("Number of games on this date:", len(games))

    # Look at keys of the first game
    if games:
        first_game = games[0]
        print("First game keys:", list(first_game.keys()))


Top-level keys: ['copyright', 'totalItems', 'totalEvents', 'totalGames', 'totalGamesInProgress', 'dates']
Number of date buckets: 1
Date bucket keys: ['date', 'totalItems', 'totalEvents', 'totalGames', 'totalGamesInProgress', 'games', 'events']
Number of games on this date: 6
First game keys: ['gamePk', 'gameGuid', 'link', 'gameType', 'season', 'gameDate', 'officialDate', 'status', 'teams', 'venue', 'content', 'isTie', 'gameNumber', 'publicFacing', 'doubleHeader', 'gamedayType', 'tiebreaker', 'calendarEventID', 'seasonDisplay', 'dayNight', 'scheduledInnings', 'reverseHomeAwayStatus', 'inningBreakLength', 'gamesInSeries', 'seriesGameNumber', 'seriesDescription', 'recordSource', 'ifNecessary', 'ifNecessaryDescription']


In [5]:
# STEP — Build a clean list of game records from /schedule

dates = sched.get("dates", [])
clean_games = []

for bucket in dates:
    for g in bucket.get("games", []):
        status = g.get("status", {}) or {}
        teams  = g.get("teams", {}) or {}
        home   = (teams.get("home") or {}).get("team") or {}
        away   = (teams.get("away") or {}).get("team") or {}
        venue  = g.get("venue", {}) or {}

        clean_games.append({
            "gamePk": g.get("gamePk"),
            "officialDate": g.get("officialDate"),          # YYYY-MM-DD (local to game site)
            "gameDate_utc": g.get("gameDate"),              # ISO UTC timestamp
            "status_code": status.get("statusCode"),
            "status_detailed": status.get("detailedState"),
            "gameType": g.get("gameType"),                  # R, S, P, etc.
            "seriesGameNumber": g.get("seriesGameNumber"),
            "seriesDescription": g.get("seriesDescription"),
            "doubleHeader": g.get("doubleHeader"),
            "dayNight": g.get("dayNight"),
            "scheduledInnings": g.get("scheduledInnings"),

            "home_team_id": home.get("id"),
            "home_team_name": home.get("name"),
            "away_team_id": away.get("id"),
            "away_team_name": away.get("name"),

            "venue_id": venue.get("id"),
            "venue_name": venue.get("name"),
        })

print(f"Built {len(clean_games)} game records")
for row in clean_games[:3]:
    print(row)


Built 6 game records
{'gamePk': 776459, 'officialDate': '2025-09-04', 'gameDate_utc': '2025-09-04T20:10:00Z', 'status_code': 'F', 'status_detailed': 'Final', 'gameType': 'R', 'seriesGameNumber': 3, 'seriesDescription': 'Regular Season', 'doubleHeader': 'N', 'dayNight': 'day', 'scheduledInnings': 9, 'home_team_id': 158, 'home_team_name': 'Milwaukee Brewers', 'away_team_id': 143, 'away_team_name': 'Philadelphia Phillies', 'venue_id': 32, 'venue_name': 'American Family Field'}
{'gamePk': 776461, 'officialDate': '2025-09-04', 'gameDate_utc': '2025-09-04T22:40:00Z', 'status_code': 'F', 'status_detailed': 'Final', 'gameType': 'R', 'seriesGameNumber': 3, 'seriesDescription': 'Regular Season', 'doubleHeader': 'N', 'dayNight': 'night', 'scheduledInnings': 9, 'home_team_id': 134, 'home_team_name': 'Pittsburgh Pirates', 'away_team_id': 119, 'away_team_name': 'Los Angeles Dodgers', 'venue_id': 31, 'venue_name': 'PNC Park'}
{'gamePk': 776458, 'officialDate': '2025-09-04', 'gameDate_utc': '2025-09-0

In [6]:
# STEP — Convert to DataFrame (this is your /schedule Silver)

import pandas as pd
df_schedule = pd.DataFrame(clean_games)
print("Shape:", df_schedule.shape)
df_schedule.head(10)


Shape: (6, 17)


Unnamed: 0,gamePk,officialDate,gameDate_utc,status_code,status_detailed,gameType,seriesGameNumber,seriesDescription,doubleHeader,dayNight,scheduledInnings,home_team_id,home_team_name,away_team_id,away_team_name,venue_id,venue_name
0,776459,2025-09-04,2025-09-04T20:10:00Z,F,Final,R,3,Regular Season,N,day,9,158,Milwaukee Brewers,143,Philadelphia Phillies,32,American Family Field
1,776461,2025-09-04,2025-09-04T22:40:00Z,F,Final,R,3,Regular Season,N,night,9,134,Pittsburgh Pirates,119,Los Angeles Dodgers,31,PNC Park
2,776458,2025-09-04,2025-09-04T23:35:00Z,F,Final,R,1,Regular Season,N,night,9,139,Tampa Bay Rays,114,Cleveland Guardians,2523,George M. Steinbrenner Field
3,776463,2025-09-04,2025-09-04T23:40:00Z,F,Final,R,3,Regular Season,N,night,9,118,Kansas City Royals,108,Los Angeles Angels,7,Kauffman Stadium
4,776462,2025-09-04,2025-09-04T23:40:00Z,F,Final,R,4,Regular Season,N,night,9,142,Minnesota Twins,145,Chicago White Sox,3312,Target Field
5,776464,2025-09-04,2025-09-04T23:40:00Z,F,Final,R,3,Regular Season,N,night,9,117,Houston Astros,147,New York Yankees,2392,Daikin Park
