In [None]:
# default_exp mlb_api

# Parse MLB API

> details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export

import statsapi as mlb

def get_pks(season):
    season = mlb.get('season',{'sportId':1,'seasonId':season})['seasons'][0]
    start = season['seasonStartDate']
    end = season['seasonEndDate']
    schedule = mlb.get(
        'schedule',
        {'startDate':start,
         'endDate':end,
         'sportId':1}
    )

    pks = []
    for date in schedule['dates']:
        for game in date['games']:
            pks.append(game['gamePk'])

    pks = list(reversed(pks))
    return pks

def add_playFKs(dictionary,play,game):
    for fk in ['atBatIndex','playEndTime']:
        dictionary[fk]=play[fk]
        dictionary['gamePk']=game['gamePk']
        
def parse(input_dict,nested_lists=None):
    out={}
    if dict not in [type(x) for x in input_dict.values()]:
        return input_dict
    else:
        for k,v in input_dict.items():
            if type(v) in [str,int,float,bool]:
                out[k]=input_dict[k]
            elif type(v)==dict:
                nested_dict = v
                # if nested dict has an 'id', use it as a foreign key
                # exepct in the cause of the 'game' table which has a pk field
                if 'id' in nested_dict.keys() and 'pk' not in nested_dict.keys():
                    out[f"{k}_id"]=nested_dict['id']
                else:
                    for key,value in nested_dict.items():
                        out[f"{k}_{key}"]=value
            elif type(v)==list:
                if nested_lists != None:
                    nested_lists.append({k:v})
        return parse(out,nested_lists)

In [None]:
{yr:len(get_pks(yr)) for yr in range(2017,2021)}

{2017: 3060, 2018: 3024, 2019: 3010, 2020: 1375}

In [None]:
game_pks = {yr:get_pks(yr) for yr in range(2017,2021)}

In [None]:
#export 

class Game():
    """Reference obj for a the result of an MLB API call."""
    
    def __init__(self,gamePk):
        """Call the mlb API and parse results into flat dictionaries for DB insert."""
        # call the api 
        game = mlb.get('game',{'gamePk':gamePk})
        self._result = game
        
        # players
        self.players = []
        players = game['gameData'].pop('players')
        for playerId in players.keys():
            self.players.append(parse(players[playerId]))
        
        # team / player stats
        #self.teams = []
        self.game_players = []
        self.team_stats = []
        self.team_records = []
        for home_away in ['away','home']:
            team = game['liveData']['boxscore']['teams'][home_away]
            players = team.pop('players')
            for playerId in players.keys():
                player = players[playerId]
                # add foreign keys to player / game records
                player['team_id']=team['team']['id']
                player['gamePk']=game['gamePk']
                self.game_players.append(parse(player))
            team_stats = team.pop('teamStats')
            team_stats['team_id']=team['team']['id']
            team_stats['gamePk']=game['gamePk']
            self.team_stats.append(parse(team_stats))
            #self.teams.append(parse(team))
        
        # game
        gm = parse(game['gameData'])
        #gm.update(game['gameData']['game'])
        setattr(self,'game',gm)
        
        # teams / team records 
        self.teams = []
        self.team_records = []
        teams = game['gameData']['teams']
        for home_away in ['home','away']:
            team = teams[home_away]
            team_record = parse(team.pop('record'))
            team_record['teamId']=team['id']
            team_record['gamePk']=game['gamePk']
            self.team_records.append(team_record)
            self.teams.append(parse(team))
        
        # venue
        self.venue = parse(game['gameData']['venue'])
        
        # plays and play events 
        parsed_plays = []
        game_play_events = []
        matchups = []
        self.pitches = []
        self.pitchData = []
        self.hitData = []
        self.actions = []
        self.movements = []
        self.credits = []
        for play in game['liveData']['plays']['allPlays']:
            matchup = play.pop('matchup')
            add_playFKs(matchup,play,game)
            matchups.append(matchup)
            
            nested_play_details = []
            play_events = []
            
            parsed_plays.append(parse(play,nested_lists=nested_play_details))
            
            
            for element in nested_play_details:
                for value in element.values():
                    if type(value)==list and len(value)>0:
                        if type(value[0])==dict:
                            for x in value:
                                play_event = parse(x,nested_play_details)
                                # add FKs to trace back to the play
                                add_playFKs(play_event,play,game)
                                if play_event.get('isPitch'):
                                    pitchFKs = ['gamePk','atBatIndex','playEndTime','index']
                                    
                                    pitchData_keys = [x for x in play_event.keys() if x.startswith('pitchData')]
                                    if len(pitchData_keys)>0:
                                        pitchData = {}
                                        for _key in pitchFKs:
                                            pitchData[_key]=play_event[_key]
                                        for _key in pitchData_keys:
                                            pitchData[_key.replace('pitchData_','')]=play_event.pop(_key)
                                        self.pitchData.append(pitchData)
                                    
                                    hitData_keys = [x for x in play_event.keys() if x.startswith('hitData')]
                                    if len(hitData_keys)>0:
                                        hitData = {}
                                        for _key in pitchFKs:
                                            hitData[_key]=play_event[_key]
                                        for _key in hitData_keys:
                                            hitData[_key.replace('hitData_','')]=play_event.pop(_key)
                                        self.hitData.append(hitData)
                                                
                                    self.pitches.append(play_event)
                                if play_event.get('type')=='action':
                                    self.actions.append(play_event)
                                if 'movement_isOut' in list(play_event.keys()):
                                    self.movements.append(play_event)
                                if 'credit' in list(play_event.keys()):
                                    self.credits.append(play_event)
                                play_events.append(play_event)
                                
            game_play_events.append(play_events)
            
        for play in parsed_plays:
            play['gamePk']=game['gamePk']
        
        setattr(self,'plays',parsed_plays)
        #setattr(self,'play_events',game_play_events)
        
        # dealing with matchups
        parsed_matchups = []
        game_matchup_stats = []
        for matchup in matchups:
            # pop out hot cold stats
            # these are such a pain to parse I might just calculate them myself
            for stats in ['batterHotColdZoneStats','pitcherHotColdZoneStats']:
                if stats in matchup.keys():
                    game_matchup_stats.append(matchup.pop(stats))
            parsed_matchups.append(parse(matchup))
                            
        setattr(self,'matchups',parsed_matchups)
    
    def __repr__(self):
        return self.game['game_id']

In [None]:
show_doc(Game)

<h2 id="Game" class="doc_header"><code>class</code> <code>Game</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>Game</code>(**`gamePk`**)

Reference obj for a the result of an MLB API call.

In [None]:
pks = game_pks[2020][1000:2000]
games = [Game(pk) for pk in pks[:5]]

In [None]:
game.game_players

[{'person_id': 622666,
  'jerseyNumber': '17',
  'position_code': '5',
  'position_name': 'Third Base',
  'position_type': 'Infielder',
  'position_abbreviation': '3B',
  'stats_batting_gamesPlayed': 1,
  'stats_batting_flyOuts': 1,
  'stats_batting_groundOuts': 1,
  'stats_batting_runs': 1,
  'stats_batting_doubles': 0,
  'stats_batting_triples': 0,
  'stats_batting_homeRuns': 0,
  'stats_batting_strikeOuts': 0,
  'stats_batting_baseOnBalls': 0,
  'stats_batting_intentionalWalks': 0,
  'stats_batting_hits': 2,
  'stats_batting_hitByPitch': 0,
  'stats_batting_atBats': 4,
  'stats_batting_caughtStealing': 0,
  'stats_batting_stolenBases': 0,
  'stats_batting_stolenBasePercentage': '.---',
  'stats_batting_groundIntoDoublePlay': 0,
  'stats_batting_groundIntoTriplePlay': 0,
  'stats_batting_plateAppearances': 4,
  'stats_batting_totalBases': 2,
  'stats_batting_rbi': 0,
  'stats_batting_leftOnBase': 0,
  'stats_batting_sacBunts': 0,
  'stats_batting_sacFlies': 0,
  'stats_batting_catche

In [None]:
[g.game for g in games]

[{'game_pk': 631241,
  'game_type': 'R',
  'game_doubleHeader': 'N',
  'game_id': '2020/07/25/atlmlb-nynmlb-1',
  'game_gamedayType': 'P',
  'game_tiebreaker': 'N',
  'game_gameNumber': 1,
  'game_calendarEventID': '14-631241-2020-07-25',
  'game_season': '2020',
  'game_seasonDisplay': '2020',
  'datetime_dateTime': '2020-07-25T20:10:00Z',
  'datetime_originalDate': '2020-07-25',
  'datetime_dayNight': 'day',
  'datetime_time': '4:10',
  'datetime_ampm': 'PM',
  'status_abstractGameState': 'Final',
  'status_codedGameState': 'F',
  'status_detailedState': 'Final',
  'status_statusCode': 'F',
  'status_abstractGameCode': 'F',
  'teams_away_id': 144,
  'teams_home_id': 121,
  'venue_id': 3289,
  'weather_condition': 'Partly Cloudy',
  'weather_temp': '89',
  'weather_wind': '10 mph, Out To CF',
  'gameInfo_attendance': 1,
  'gameInfo_gameDurationMinutes': 201,
  'review_hasChallenges': True,
  'review_away_used': 0,
  'review_away_remaining': 1,
  'review_home_used': 0,
  'review_home_r

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted index.ipynb.
