In [None]:
# default_exp mlb_api

# Parse MLB API

> details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#exporti

import statsapi as mlb
from typing import List

In [None]:
#exporti 

def get_season_game_pks(season: int) -> List[int]:
    """
    Returns a list of gamePks for a given season
    
    :param season: int for the year of the season 
    
    :returns list of gamePks as integers
    """
    season = mlb.get('season',{'sportId':1,'seasonId':season})['seasons'][0]
    start = season['seasonStartDate']
    end = season['seasonEndDate']
    schedule = mlb.get(
        'schedule',
        {'startDate':start,
         'endDate':end,
         'sportId':1}
    )

    pks = list(reversed(
        [game['gamePk'] 
         for dates in schedule['dates']
         for game in dates['games'] 
        ]
    ))
    pks = list(reversed(pks))
    return pks

In [None]:
show_doc(get_season_game_pks)

<h4 id="get_season_game_pks" class="doc_header"><code>get_season_game_pks</code><a href="__main__.py#L1" class="source_link" style="float:right">[source]</a></h4>

> <code>get_season_game_pks</code>(**`season`**:`int`)

Returns a list of gamePks for a given season

:param season: int for the year of the season 

:returns list of gamePks as integers

In [None]:
gamePks = get_season_game_pks(2021)[:5]
assert (len(gamePks))==5
for pk in gamePks:
    assert type(pk)==int

In [None]:
game_details = mlb.get('schedule',{'sportId':1,'date':'2021-04-01'})['dates'][0]['games'][0]

In [None]:
#exporti 

def add_playFKs(dictionary,play,game):
    """
    For the nested events within a play, add the foreign keys needed to trace the event 
    back to it's game/play
    """
    for fk in ['atBatIndex','playEndTime']:
        dictionary[fk]=play[fk]
        dictionary['gamePk']=game['gamePk']
        
def parse(input_dict,nested_lists=None):
    """recursively parse a dictionary with nested lists into a flat dictionary"""
    out={}
    if dict not in [type(x) for x in input_dict.values()]:
        return input_dict
    else:
        for k,v in input_dict.items():
            if type(v) in [str,int,float,bool]:
                out[k]=input_dict[k]
            elif type(v)==dict:
                nested_dict = v
                # if nested dict has an 'id', use it as a foreign key
                # exepct in the cause of the 'game' table which has a pk field
                if 'id' in nested_dict.keys() and 'pk' not in nested_dict.keys():
                    out[f"{k}_id"]=nested_dict['id']
                else:
                    for key,value in nested_dict.items():
                        out[f"{k}_{key}"]=value
            elif type(v)==list:
                if nested_lists != None:
                    nested_lists.append({k:v})
        return parse(out,nested_lists)

In [None]:
show_doc(add_playFKs)

<h4 id="add_playFKs" class="doc_header"><code>add_playFKs</code><a href="__main__.py#L3" class="source_link" style="float:right">[source]</a></h4>

> <code>add_playFKs</code>(**`dictionary`**, **`play`**, **`game`**)

For the nested events within a play, add the foreign keys needed to trace the event 
back to it's game/play

In [None]:
show_doc(parse)

<h4 id="parse" class="doc_header"><code>parse</code><a href="__main__.py#L12" class="source_link" style="float:right">[source]</a></h4>

> <code>parse</code>(**`input_dict`**, **`nested_lists`**=*`None`*)

recursively parse a dictionary with nested lists into a flat dictionary

In [None]:
#export 

class Game():
    """Reference obj for a the result of an MLB API call."""
    
    def __init__(self,gamePk):
        """Call the mlb API and parse results into flat dictionaries for DB insert."""
        # call the api 
        game = mlb.get('game',{'gamePk':gamePk})
        self._result = game
        
        # players
        self.players = []
        players = game['gameData'].pop('players')
        for playerId in players.keys():
            self.players.append(parse(players[playerId]))
        
        # team / player stats
        #self.teams = []
        self.game_players = []
        self.team_stats = []
        self.team_records = []
        for home_away in ['away','home']:
            team = game['liveData']['boxscore']['teams'][home_away]
            players = team.pop('players')
            for playerId in players.keys():
                player = players[playerId]
                # add foreign keys to player / game records
                player['team_id']=team['team']['id']
                player['gamePk']=game['gamePk']
                self.game_players.append(parse(player))
            team_stats = team.pop('teamStats')
            team_stats['team_id']=team['team']['id']
            team_stats['gamePk']=game['gamePk']
            self.team_stats.append(parse(team_stats))
            #self.teams.append(parse(team))
        
        # game
        gm = parse(game['gameData'])
        #gm.update(game['gameData']['game'])
        setattr(self,'game',gm)
        
        # teams / team records 
        self.teams = []
        self.team_records = []
        teams = game['gameData']['teams']
        for home_away in ['home','away']:
            team = teams[home_away]
            team_record = parse(team.pop('record'))
            team_record['teamId']=team['id']
            team_record['gamePk']=game['gamePk']
            self.team_records.append(team_record)
            self.teams.append(parse(team))
        
        # venue
        self.venue = parse(game['gameData']['venue'])
        
        # plays and play events 
        parsed_plays = []
        game_play_events = []
        matchups = []
        self.pitches = []
        self.pitchData = []
        self.hitData = []
        self.actions = []
        self.movements = []
        self.credits = []
        for play in game['liveData']['plays']['allPlays']:
            matchup = play.pop('matchup')
            add_playFKs(matchup,play,game)
            matchups.append(matchup)
            
            nested_play_details = []
            play_events = []
            
            parsed_plays.append(parse(play,nested_lists=nested_play_details))
            
            
            for element in nested_play_details:
                for value in element.values():
                    if type(value)==list and len(value)>0:
                        if type(value[0])==dict:
                            for x in value:
                                play_event = parse(x,nested_play_details)
                                # add FKs to trace back to the play
                                add_playFKs(play_event,play,game)
                                if play_event.get('isPitch'):
                                    pitchFKs = ['gamePk','atBatIndex','playEndTime','index']
                                    
                                    pitchData_keys = [x for x in play_event.keys() if x.startswith('pitchData')]
                                    if len(pitchData_keys)>0:
                                        pitchData = {}
                                        for _key in pitchFKs:
                                            pitchData[_key]=play_event[_key]
                                        for _key in pitchData_keys:
                                            pitchData[_key.replace('pitchData_','')]=play_event.pop(_key)
                                        self.pitchData.append(pitchData)
                                    
                                    hitData_keys = [x for x in play_event.keys() if x.startswith('hitData')]
                                    if len(hitData_keys)>0:
                                        hitData = {}
                                        for _key in pitchFKs:
                                            hitData[_key]=play_event[_key]
                                        for _key in hitData_keys:
                                            hitData[_key.replace('hitData_','')]=play_event.pop(_key)
                                        self.hitData.append(hitData)
                                                
                                    self.pitches.append(play_event)
                                if play_event.get('type')=='action':
                                    self.actions.append(play_event)
                                if 'movement_isOut' in list(play_event.keys()):
                                    self.movements.append(play_event)
                                if 'credit' in list(play_event.keys()):
                                    self.credits.append(play_event)
                                play_events.append(play_event)
                                
            game_play_events.append(play_events)
            
        for play in parsed_plays:
            play['gamePk']=game['gamePk']
        
        setattr(self,'plays',parsed_plays)
        #setattr(self,'play_events',game_play_events)
        
        # dealing with matchups
        parsed_matchups = []
        game_matchup_stats = []
        for matchup in matchups:
            # pop out hot cold stats
            # these are such a pain to parse I might just calculate them myself
            for stats in ['batterHotColdZoneStats','pitcherHotColdZoneStats']:
                if stats in matchup.keys():
                    game_matchup_stats.append(matchup.pop(stats))
            parsed_matchups.append(parse(matchup))
                            
        setattr(self,'matchups',parsed_matchups)
    
    def __repr__(self):
        return self.game['game_id']

In [None]:
show_doc(Game)

<h2 id="Game" class="doc_header"><code>class</code> <code>Game</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>Game</code>(**`gamePk`**)

Reference obj for a the result of an MLB API call.

In [None]:
game = Game(gamePks[0])

In [None]:
game.game

{'game_pk': 642138,
 'game_type': 'S',
 'game_doubleHeader': 'N',
 'game_id': '2021/02/28/pitmlb-balmlb-1',
 'game_gamedayType': 'N',
 'game_tiebreaker': 'N',
 'game_gameNumber': 1,
 'game_calendarEventID': '14-642138-2021-02-28',
 'game_season': '2021',
 'game_seasonDisplay': '2021',
 'datetime_dateTime': '2021-02-28T18:05:00Z',
 'datetime_originalDate': '2021-02-28',
 'datetime_dayNight': 'day',
 'datetime_time': '1:05',
 'datetime_ampm': 'PM',
 'status_abstractGameState': 'Final',
 'status_codedGameState': 'F',
 'status_detailedState': 'Final',
 'status_statusCode': 'F',
 'status_startTimeTBD': False,
 'status_abstractGameCode': 'F',
 'teams_away_id': 134,
 'teams_home_id': 110,
 'venue_id': 2508,
 'weather_condition': 'Partly Cloudy',
 'weather_temp': '83',
 'weather_wind': '12 mph, In From CF',
 'gameInfo_attendance': 1705,
 'gameInfo_gameDurationMinutes': 171,
 'review_hasChallenges': True,
 'review_away_used': 0,
 'review_away_remaining': 1,
 'review_home_used': 0,
 'review_home

In [None]:
game.movements[:2]

[{'movement_end': '1B',
  'movement_isOut': False,
  'details_event': 'Single',
  'details_eventType': 'single',
  'details_runner_id': 621028,
  'details_isScoringEvent': False,
  'details_rbi': False,
  'details_earned': False,
  'details_teamUnearned': False,
  'details_playIndex': 3,
  'atBatIndex': 0,
  'gamePk': 642138,
  'playEndTime': '2021-02-28T18:05:28.732Z'},
 {'movement_originBase': '1B',
  'movement_start': '1B',
  'movement_end': '2B',
  'movement_isOut': False,
  'details_event': 'Walk',
  'details_eventType': 'walk',
  'details_movementReason': 'r_adv_force',
  'details_runner_id': 621028,
  'details_isScoringEvent': False,
  'details_rbi': False,
  'details_earned': False,
  'details_teamUnearned': False,
  'details_playIndex': 3,
  'atBatIndex': 1,
  'gamePk': 642138,
  'playEndTime': '2021-02-28T18:08:10.770Z'}]

In [None]:
game.game_players[0]

{'person_id': 624428,
 'jerseyNumber': '26',
 'position_code': '4',
 'position_name': 'Second Base',
 'position_type': 'Infielder',
 'position_abbreviation': '2B',
 'status_code': 'A',
 'status_description': 'Active',
 'parentTeamId': 134,
 'seasonStats_batting_gamesPlayed': 0,
 'seasonStats_batting_flyOuts': 0,
 'seasonStats_batting_groundOuts': 0,
 'seasonStats_batting_runs': 0,
 'seasonStats_batting_doubles': 0,
 'seasonStats_batting_triples': 0,
 'seasonStats_batting_homeRuns': 0,
 'seasonStats_batting_strikeOuts': 0,
 'seasonStats_batting_baseOnBalls': 0,
 'seasonStats_batting_intentionalWalks': 0,
 'seasonStats_batting_hits': 0,
 'seasonStats_batting_hitByPitch': 0,
 'seasonStats_batting_avg': '.000',
 'seasonStats_batting_atBats': 0,
 'seasonStats_batting_obp': '.000',
 'seasonStats_batting_slg': '.000',
 'seasonStats_batting_ops': '.000',
 'seasonStats_batting_caughtStealing': 0,
 'seasonStats_batting_stolenBases': 0,
 'seasonStats_batting_stolenBasePercentage': '.---',
 'seaso

In [None]:
game.teams

[{'id': 110,
  'name': 'Baltimore Orioles',
  'link': '/api/v1/teams/110',
  'season': 2021,
  'venue_id': 2,
  'springVenue_id': 2508,
  'teamCode': 'bal',
  'fileCode': 'bal',
  'abbreviation': 'BAL',
  'teamName': 'Orioles',
  'locationName': 'Baltimore',
  'firstYearOfPlay': '1901',
  'league_id': 103,
  'division_id': 201,
  'sport_id': 1,
  'shortName': 'Baltimore',
  'springLeague_id': 115,
  'allStarStatus': 'N',
  'active': True},
 {'id': 134,
  'name': 'Pittsburgh Pirates',
  'link': '/api/v1/teams/134',
  'season': 2021,
  'venue_id': 31,
  'springVenue_id': 2526,
  'teamCode': 'pit',
  'fileCode': 'pit',
  'abbreviation': 'PIT',
  'teamName': 'Pirates',
  'locationName': 'Pittsburgh',
  'firstYearOfPlay': '1882',
  'league_id': 104,
  'division_id': 205,
  'sport_id': 1,
  'shortName': 'Pittsburgh',
  'springLeague_id': 115,
  'allStarStatus': 'N',
  'active': True}]