In [None]:
#| default_exp baseball

# Baseball
> Modeling the MLB Stats API

In [None]:
from humble_chuck.models import BaseModel
from pydantic import ConfigDict
from enum import Enum
import statsapi as mlb
import datetime as dt
from typing import *

In [None]:
def get_season_games(season: int) -> List[dict]:
    """get mlb games for a given season"""
    season = mlb.get('season',{'sportId':1,'seasonId':season})['seasons'][0]
    start = season['seasonStartDate']
    end = season['seasonEndDate']
    schedule = mlb.get(
        endpoint='schedule',
        params={'startDate':start,'endDate':end,'sportId':1}
    )

    pks = [
        game for dates in schedule['dates'] for game in dates['games'] 
    ]
    
    
    return pks

In [None]:
games_2024 = get_season_games(2024)
games_2024[-1].keys()

dict_keys(['gamePk', 'gameGuid', 'link', 'gameType', 'season', 'gameDate', 'officialDate', 'status', 'teams', 'venue', 'content', 'isTie', 'gameNumber', 'publicFacing', 'doubleHeader', 'gamedayType', 'tiebreaker', 'calendarEventID', 'seasonDisplay', 'dayNight', 'description', 'scheduledInnings', 'reverseHomeAwayStatus', 'inningBreakLength', 'gamesInSeries', 'seriesGameNumber', 'seriesDescription', 'recordSource', 'ifNecessary', 'ifNecessaryDescription'])

Quick check to see how we can filter for finished games

In [None]:
{g['status']['detailedState'] for g in games_2024}

{'Cancelled', 'Completed Early', 'Final', 'Postponed'}

In [None]:
completed_games = [g for g in games_2024 if g['status']['detailedState']=='Final']
len(completed_games)

2935

## Game Response

In [None]:
game = mlb.get('game',{'gamePk':'748266'})
game.keys()

dict_keys(['copyright', 'gamePk', 'link', 'metaData', 'gameData', 'liveData'])

In [None]:
game['metaData']

{'wait': 10,
 'timeStamp': '20240222_231445',
 'gameEvents': ['strikeout', 'game_finished'],
 'logicalEvents': ['midInning',
  'countChange',
  'count03',
  'gameStateChangeToGameOver']}

In [None]:
class GameResponseBase(BaseModel):
    
    copyright: str
    gamePk: int
    link: str
    

In [None]:
GameResponseBase.model_validate(game)

0,1
copyright,"Copyright 2025 MLB Advanced Media, L.P. Use of any content on this page acknowledges agreement to the terms posted here http://gdx.mlb.com/components/copyright.txt"
gamePk,748266
link,/api/v1.1/game/748266/feed/live


In [None]:
game['liveData'].keys()

dict_keys(['plays', 'linescore', 'boxscore', 'decisions', 'leaders'])

## Game Data

In [None]:
gameData = game['gameData']
print(gameData.keys())


dict_keys(['game', 'datetime', 'status', 'teams', 'players', 'venue', 'officialVenue', 'weather', 'gameInfo', 'review', 'flags', 'alerts', 'probablePitchers', 'officialScorer', 'primaryDatacaster', 'moundVisits'])


In [None]:
mlb.get(
    'meta',
    params={
        'type':'gameTypes'
    }
)

[{'id': 'S', 'description': 'Spring Training'},
 {'id': 'R', 'description': 'Regular Season'},
 {'id': 'F', 'description': 'Wild Card'},
 {'id': 'D', 'description': 'Division Series'},
 {'id': 'L', 'description': 'League Championship Series'},
 {'id': 'W', 'description': 'World Series'},
 {'id': 'C', 'description': 'Championship'},
 {'id': 'N', 'description': 'Nineteenth Century Series'},
 {'id': 'P', 'description': 'Playoffs'},
 {'id': 'A', 'description': 'All-Star Game'},
 {'id': 'I', 'description': 'Intrasquad'},
 {'id': 'E', 'description': 'Exhibition'}]

In [None]:
gameday_types = mlb.get(
    'meta',
    params={
        'type':'gamedayTypes'
    }
)
gameday_types
{x['code']: x['description'] for x in gameday_types}

{'P': 'Premium Gameday - 3D Pitch tracking and Premium product experience',
 'E': 'Enhanced Gameday - 3D Pitch tracking',
 'Y': 'Regular Gameday - Pitch by Pitch',
 'D': 'No Gameday - Pitch by Pitch',
 'N': 'No Gameday - Play by Play level data only',
 'L': 'No Gameday - Linescore level data',
 'B': 'No Gameday - PG / Box Score level data only',
 'S': 'No Gameday - Score / Game Result level data only'}

In [None]:
#|exporti 

def create_enum_with_descriptions(name: str, id_to_description: dict[str, str]) -> Type[Enum]:
    """Dynamically create a str Enum with attached description attribute for each value."""
    def __new__(cls, value):
        obj = str.__new__(cls, value)
        obj._value_ = value
        obj.description = id_to_description[value]
        return obj

    namespace = {"__new__": __new__}

    # Add enum members from dict keys
    for key, desc in id_to_description.items():
        member_name = desc.upper().replace(" ", "_").replace("-", "_")
        namespace[member_name] = key

    return Enum(name, namespace, type=str)

In [None]:
#|exporti 

game_type_map = {
    'S': 'Spring Training',
    'R': 'Regular Season',
    'F': 'Wild Card',
    'D': 'Division Series',
    'L': 'League Championship Series',
    'W': 'World Series',
    'C': 'Championship',
    'N': 'Nineteenth Century Series',
    'P': 'Playoffs',
    'A': 'All-Star Game',
    'I': 'Intrasquad',
    'E': 'Exhibition',
}

gameday_type_map = {'P': 'Premium Gameday - 3D Pitch tracking and Premium product experience',
 'E': 'Enhanced Gameday - 3D Pitch tracking',
 'Y': 'Regular Gameday - Pitch by Pitch',
 'D': 'No Gameday - Pitch by Pitch',
 'N': 'No Gameday - Play by Play level data only',
 'L': 'No Gameday - Linescore level data',
 'B': 'No Gameday - PG / Box Score level data only',
 'S': 'No Gameday - Score / Game Result level data only'}

In [None]:
#| exporti 

GameType = create_enum_with_descriptions("GameType", game_type_map)
GameDayType = create_enum_with_descriptions("GameDayType",gameday_type_map)

In [None]:
#| export 

class Game(BaseModel):
    """ Model for GameData -> Game Response"""
    pk: int
    type: GameType
    doubleHeader: bool
    id: str
    gamedayType: GameDayType
    tiebreaker: bool
    gameNumber: int
    calendarEventID: str
    season: str
    seasonDisplay: str

    

In [None]:
game_eg = gameData['game']
game_eg

{'pk': 748266,
 'type': 'S',
 'doubleHeader': 'N',
 'id': '2024/02/22/lanmlb-sdnmlb-1',
 'gamedayType': 'N',
 'tiebreaker': 'N',
 'gameNumber': 1,
 'calendarEventID': '14-748266-2024-02-22',
 'season': '2024',
 'seasonDisplay': '2024'}

In [None]:
game_validated = Game.model_validate(game_eg)
game_validated

0,1
pk,748266
type,S
doubleHeader,False
id,2024/02/22/lanmlb-sdnmlb-1
gamedayType,N
tiebreaker,False
gameNumber,1
calendarEventID,14-748266-2024-02-22
season,2024
seasonDisplay,2024


In [None]:
game_validated.type.description

'Spring Training'

In [None]:
game_validated.gamedayType.description

'No Gameday - Play by Play level data only'

In [None]:
game.keys()

dict_keys(['copyright', 'gamePk', 'link', 'metaData', 'gameData', 'liveData'])