In [1]:
import time
import requests
import json
import re

In [2]:
base_url = "http://statsapi.mlb.com/api/"
alt_base_url = "https://beta-statsapi.mlb.com:443/api/"

In [3]:
def get_json(url):
    ## get_json: str -> json
    ## get_json() function takes 'url' and returns the response in json format from the MLB Stats API.
    
    response = requests.get(url)
    if (response.status_code != 200):
        print('status code: %s' % response.status_code)
        time.sleep(1.0)
        get_json(url)
    else:
        content = response.content
        json_content = json.loads(content)
        return(json_content)

#### Functions to pull data from API

In [4]:
from datetime import datetime 
import pytz

In [5]:
eastern = pytz.timezone('US/Eastern')
fmt = '%Y-%m-%d'
today = datetime.today().astimezone().strftime(fmt)

### Attendance

In [6]:
def get_attendance(Id,
                teamOrleague = "team",
                date = None,
                startDate = None,
                endDate = None,
                season = None,
                field = "Ytd"):

    url = alt_base_url + 'v1/attendance'
    attedance_field = "attendanceAverage" + field 
    
    attendance_dict = {}
       
    if not any([date, startDate, endDate, season]):
        date = today
    
    if date is not None:
        suffix = "?date={}".format(date)
        
    if (startDate is not None and endDate is None) or (startDate is None and endDate is not None):
        ValueError("Specify both Start Date and End Date in MM/DD/YYYY format.")
        print
        
    if season is not None:
        if type(season) is list:
            suffix = "&season={}".format(','.join(str(s) for s in season))
            
        else:
            suffix = "&season={}".format(season)
            
    
    
    if teamOrleague == "team":
        
        if type(Id) is list:
            ID_str = ','.join(str(i) for i in Id)

        else:
            ID_str = Id

        if teamOrleague == "team":
            suffix = suffix + "&teamId={teamId}".format(teamId = ID_str)
                
        attendance_content = get_json(url + suffix)

        records = attendance_content['records']

        for record in records:
            team = record['team']['id']
            attendance = record[attedance_field]

            if date is not None:
                attendance_dict.update({team: {date: attendance}})
            
            if season is not None:
                if type(season) is list:
                    for s in season:
                        s = record['year']
                        attendance_dict.update({team: {s: attendance}})
                
                else:
                    attendance_dict.update({team: {season: attendance}})
                    
    
    
    if teamOrleague == "league":
        
        if type(Id) is not list:
            Ids = list(Id)
            
        else: 
            Ids = Id
        
        for i in Ids:
            suffix = suffix + "&leagueId={leagueId}".format(leagueId = i)

            attendance_content = get_json(url + suffix)

            records = attendance_content['records']

            for record in records:
                league = i
                attendance = record[attedance_field]

                if date is not None:
                    attendance_dict.update({league: {date: attendance}})
                    
                if season is not None:
                    if type(season) is list:
                        for s in season:
                            s = record['year']
                            attendance_dict.update({league: {s: attendance}})

                    else:
                        attendance_dict.update({league: {season: attendance}})
                        
                        
    return(attendance_dict)

In [7]:
get_attendance(141, date = "2021-04-01")

{141: {'2021-04-01': 5425}}

In [8]:
get_attendance([103,104],"league",date = "2021-04-01")

{103: {'2021-04-01': 9155}, 104: {'2021-04-01': 10285}}

### Divisions

In [9]:
def get_division():
    
    division_url = "http://statsapi.mlb.com/api/v1/divisions?sportId=1"
    division_content = get_json(division_url)
    divisions = division_content['divisions']
    
    division_filter = ['id', 'name', 'nameShort', 'abbreviation']
    
    division_list = [{k:v for k,v in d.items() if k in division_filter} for d in divisions]
    
    return(division_list)
    

In [10]:
get_division()

[{'id': 200,
  'name': 'American League West',
  'nameShort': 'AL West',
  'abbreviation': 'ALW'},
 {'id': 201,
  'name': 'American League East',
  'nameShort': 'AL East',
  'abbreviation': 'ALE'},
 {'id': 202,
  'name': 'American League Central',
  'nameShort': 'AL Central',
  'abbreviation': 'ALC'},
 {'id': 203,
  'name': 'National League West',
  'nameShort': 'NL West',
  'abbreviation': 'NLW'},
 {'id': 204,
  'name': 'National League East',
  'nameShort': 'NL East',
  'abbreviation': 'NLE'},
 {'id': 205,
  'name': 'National League Central',
  'nameShort': 'NL Central',
  'abbreviation': 'NLC'}]

In [11]:
def lookup_division(divisionId, field = "nameShort"):
    
    division_list = get_division()
    division_wanted = [d[field] for d in division_list if divisionId in d.values()][0]
    
    return(division_wanted)
    

In [12]:
lookup_division(200)

'AL West'

### Linescore

In [13]:
def get_linescore(gamePk):
    
    team_url = base_url + "v1/schedule?gamePk={gamePk}".format(gamePk = gamePk)
    team_content = get_json(team_url)
    
    teams = team_content['dates'][0]['games'][0]['teams']
    
    team_dict = {"teams": {k: {x: y for x,y in v['team'].items() if x == 'id' or x == 'name'} for k, v in teams.items()}}
    
    
    ls_url = base_url + "/v1/game/{gamePk}/linescore".format(gamePk = gamePk)
    linescore_content = get_json(ls_url)
    
    linescore_dict = {}
    
    innings = linescore_content['innings']
    
    for inning in innings:
        inn_count = inning['ordinalNum']
        home = {x: y for x, y in inning['home'].items() if x != 'leftOnBase'} 
        away = {x: y for x, y in inning['away'].items() if x != 'leftOnBase'} 
        
        linescore_dict.update({inn_count: {"home": home, "away": away}})
    
    linescore_dict.update(team_dict)
    
    return(linescore_dict)

In [14]:
get_linescore(634645)

{'1st': {'home': {'runs': 2, 'hits': 3, 'errors': 0},
  'away': {'runs': 0, 'hits': 1, 'errors': 0}},
 '2nd': {'home': {'runs': 1, 'hits': 1, 'errors': 0},
  'away': {'runs': 0, 'hits': 0, 'errors': 1}},
 '3rd': {'home': {'runs': 0, 'hits': 0, 'errors': 0},
  'away': {'runs': 0, 'hits': 0, 'errors': 0}},
 '4th': {'home': {'runs': 0, 'hits': 0, 'errors': 0},
  'away': {'runs': 0, 'hits': 1, 'errors': 0}},
 '5th': {'home': {'runs': 0, 'hits': 1, 'errors': 0},
  'away': {'runs': 0, 'hits': 0, 'errors': 0}},
 '6th': {'home': {'runs': 0, 'hits': 0, 'errors': 0},
  'away': {'runs': 0, 'hits': 1, 'errors': 0}},
 '7th': {'home': {'runs': 0, 'hits': 0, 'errors': 0},
  'away': {'runs': 0, 'hits': 0, 'errors': 0}},
 '8th': {'home': {'runs': 0, 'hits': 0, 'errors': 0},
  'away': {'runs': 0, 'hits': 1, 'errors': 0}},
 '9th': {'home': {'hits': 0, 'errors': 0},
  'away': {'runs': 2, 'hits': 2, 'errors': 0}},
 'teams': {'away': {'id': 114, 'name': 'Cleveland Indians'},
  'home': {'id': 116, 'name': 'D

### Pitch f/x

In [15]:
def get_pitchfx(gamePk):
    
    bs_url = base_url + "/v1/game/{gamePk}/boxscore".format(gamePk = gamePk)
    bs_content = get_json(bs_url)
    bs_info = bs_content['info']

    umpires = [l for l in bs_info if l['label'] == "Umpires"][0]['value']
    home_umpire = re.findall("HP: [\w \.]+ 1B:", umpires)[0].lstrip("HP: ").rstrip(". 1B:")
    

    pbp_url = base_url + "/v1/game/{gamePk}/playByPlay".format(gamePk = gamePk)
    pbp_content = get_json(pbp_url)
    
    allPlays = pbp_content['allPlays']
       
    atBat_dict = {"home_umpire": home_umpire}
    
    for play in allPlays:
                
        about = play['about']
        
        atBat_count = about['atBatIndex']        
        inning = about['inning']
        home = "home" if about['isTopInning'] else "away"
        
        matchup = play['matchup']
        pitcher = matchup['pitcher']
        batter = matchup['batter']
        pitchHand = matchup['pitchHand']['code']
        batSide = matchup['batSide']['code']      
        
        """
        matchup_dict = {'pitcher': pitcher,
                       'batter': batter,
                       'pitchHand': pitchHand,
                       'batSide': batSide}
        """
        atBat_dict.update({atBat_count: {"inning": inning, 
                                         "home": home, 
                                         'pitcher': pitcher,
                                         'batter': batter,
                                         'pitchHand': pitchHand,
                                         'batSide': batSide}})
        
        events = play['playEvents']     
        pitched_events = (event for event in events if event['isPitch'])
        
        pitch_list = []
        
        for event in pitched_events:
            
            if not event['isPitch']:
                continue
            
            """
            if event['details']['isInPlay']:
                continue
            
            strike = event['details']['isStrike']
            
            pitch_index = event['index']
            """
            
            strike = "strike" if not event['details']['isBall'] else "ball"
            
            pitch_data = event['pitchData']
            
            sz_top = pitch_data['strikeZoneTop']
            sz_bottom = pitch_data['strikeZoneBottom']
            x = pitch_data['coordinates']['pX']
            z = pitch_data['coordinates']['pZ']
            
            #pitch_dict = {pitch_index: {'call': strike, 'sz_top': sz_top, 'sz_bottom': sz_bottom, 'x': x, 'z': z}}
            pitch_list.append({'call': strike, 'sz_top': sz_top, 'sz_bottom': sz_bottom, 'x': x, 'z': z})
            
        atBat_dict[atBat_count]['pitchData'] = pitch_list
            
    return(atBat_dict)

In [16]:
get_pitchfx(634583)

{'home_umpire': 'Ed Hickox',
 0: {'inning': 1,
  'home': 'home',
  'pitcher': {'id': 502043,
   'fullName': 'Kyle Gibson',
   'link': '/api/v1/people/502043'},
  'batter': {'id': 543760,
   'fullName': 'Marcus Semien',
   'link': '/api/v1/people/543760'},
  'pitchHand': 'R',
  'batSide': 'R',
  'pitchData': [{'call': 'strike',
    'sz_top': 3.37,
    'sz_bottom': 1.53,
    'x': 0.02,
    'z': 3.3}]},
 1: {'inning': 1,
  'home': 'home',
  'pitcher': {'id': 502043,
   'fullName': 'Kyle Gibson',
   'link': '/api/v1/people/502043'},
  'batter': {'id': 624415,
   'fullName': 'Cavan Biggio',
   'link': '/api/v1/people/624415'},
  'pitchHand': 'R',
  'batSide': 'L',
  'pitchData': [{'call': 'strike',
    'sz_top': 3.51,
    'sz_bottom': 1.61,
    'x': -0.15,
    'z': 2.45},
   {'call': 'ball', 'sz_top': 3.42, 'sz_bottom': 1.59, 'x': -0.74, 'z': 4.41},
   {'call': 'strike',
    'sz_top': 3.47,
    'sz_bottom': 1.59,
    'x': -0.17,
    'z': 2.23},
   {'call': 'ball', 'sz_top': 3.48, 'sz_bottom

### Persons

#### Basic Info

In [17]:
import re

def lookup_player(name):
    """
    returns personId for given player name
    """
    sports_url = base_url + "/v1/sports/1/players"
    sports_content = get_json(sports_url)
    
    players = sports_content['people']
    
    matching_player = [player for player in players if str(name).lower() in str(player.values()).lower()]
    
    if len(matching_player) == 1:
        return(matching_player[0]['id'])
    
    return(matching_player)    

In [18]:
lookup_player("ryu")

547943

In [19]:
def lookup_player_info(playerId):
    
    person_url = base_url + "v1/people/{}/".format(playerId)
    person_content = get_json(person_url)
    profile = person_content['people'][0]
    
    return(profile)

In [20]:
lookup_player_info(547943)

{'id': 547943,
 'fullName': 'Hyun Jin Ryu',
 'link': '/api/v1/people/547943',
 'firstName': 'Hyun Jin',
 'lastName': 'Ryu',
 'primaryNumber': '99',
 'birthDate': '1987-03-25',
 'currentAge': 34,
 'birthCity': 'Incheon',
 'birthCountry': 'South Korea',
 'height': '6\' 3"',
 'weight': 255,
 'active': True,
 'primaryPosition': {'code': '1',
  'name': 'Pitcher',
  'type': 'Pitcher',
  'abbreviation': 'P'},
 'useName': 'Hyun Jin',
 'boxscoreName': 'Ryu',
 'nickName': 'Monster',
 'gender': 'M',
 'isPlayer': True,
 'isVerified': True,
 'pronunciation': 'he-yun jin ree-yoo',
 'mlbDebutDate': '2013-04-02',
 'batSide': {'code': 'R', 'description': 'Right'},
 'pitchHand': {'code': 'L', 'description': 'Left'},
 'nameFirstLast': 'Hyun Jin Ryu',
 'nameSlug': 'hyun-jin-ryu-547943',
 'firstLastName': 'Hyun Jin Ryu',
 'lastFirstName': 'Ryu, Hyun Jin',
 'lastInitName': 'Ryu, H',
 'initLastName': 'H Ryu',
 'fullFMLName': 'Hyun Jin Ryu',
 'fullLFMName': 'Ryu, Hyun Jin',
 'strikeZoneTop': 3.49,
 'strikeZon

#### Game-specific Player Stat

In [21]:
def player_game_stat(personId, gamePk):
    
    game_stat_url = base_url + "v1/people/{playerId}/stats/game/{gamePk}".format(playerId = personId, gamePk = gamePk)
    game_stat_content = get_json(game_stat_url)
    game_stat = game_stat_content['stats']
    
    stats = [s for s in game_stat if s['splits']]
    
    stat_dict = {}
    
    try:    
        for s in stats:        
            t = s["group"]["displayName"]
            splits = s["splits"]
            stat = splits[0]["stat"]
            stat_dict.update({t:stat})
        return(stat_dict)

    except:
        print("The player did not play in the stated game.")

In [22]:
player_game_stat(547943, 634645)

{'pitching': {'groundOuts': 1,
  'airOuts': 1,
  'runs': 2,
  'doubles': 0,
  'triples': 1,
  'homeRuns': 0,
  'strikeOuts': 2,
  'baseOnBalls': 0,
  'intentionalWalks': 0,
  'hits': 2,
  'hitByPitch': 0,
  'avg': '.333',
  'atBats': 6,
  'obp': '.333',
  'slg': '.667',
  'ops': '1.000',
  'caughtStealing': 0,
  'stolenBases': 0,
  'stolenBasePercentage': '.---',
  'groundIntoDoublePlay': 0,
  'numberOfPitches': 25,
  'totalBases': 4,
  'groundOutsToAirouts': '1.00',
  'rbi': 0,
  'strikeoutWalkRatio': '-.--',
  'sacBunts': 0,
  'sacFlies': 0}}

### Schedule

In [23]:
su = base_url + "/v1/schedule?sportId=1"
s = get_json(su)
s    

{'copyright': 'Copyright 2021 MLB Advanced Media, L.P.  Use of any content on this page acknowledges agreement to the terms posted here http://gdx.mlb.com/components/copyright.txt',
 'totalItems': 15,
 'totalEvents': 0,
 'totalGames': 15,
 'totalGamesInProgress': 0,
 'dates': [{'date': '2021-06-15',
   'totalItems': 15,
   'totalEvents': 0,
   'totalGames': 15,
   'totalGamesInProgress': 0,
   'games': [{'gamePk': 633684,
     'link': '/api/v1.1/game/633684/feed/live',
     'gameType': 'R',
     'season': '2021',
     'gameDate': '2021-06-15T23:05:00Z',
     'officialDate': '2021-06-15',
     'status': {'abstractGameState': 'Preview',
      'codedGameState': 'P',
      'detailedState': 'Pre-Game',
      'statusCode': 'P',
      'startTimeTBD': False,
      'abstractGameCode': 'P'},
     'teams': {'away': {'leagueRecord': {'wins': 23,
        'losses': 42,
        'pct': '.354'},
       'score': 0,
       'team': {'id': 134,
        'name': 'Pittsburgh Pirates',
        'link': '/api/v1

In [24]:
def get_schedule(date = None,
                 startDate = None,
                 endDate = None,
                 teamId = None,
                 season = None,
                 sportId = 1):
    
    schedule_base_url = base_url + "/v1/schedule?sportId=1"
    
    """
    if not any([date, startDate, endDate]):
        date = today
    """
    suffix = ""
    
    if date is not None:
        suffix += "&date={}".format(date)
        
    if (startDate is not None and endDate is None) or (startDate is None and endDate is not None):
        ValueError("Specify both Start Date and End Date in MM/DD/YYYY format.")
        
    if startDate is not None and endDate is not None:
        suffix += "&startDate={startDate}&endDate={endDate}".format(startDate = startDate, endDate = endDate)
        
    if teamId is not None:
        suffix += "&teamId={}".format(teamId)
        
    schedule_url = schedule_base_url + suffix
    print(schedule_url)
    schedule_content = get_json(schedule_url)
    
    schedule_dict = {}
    
    dates = schedule_content['dates']
    
    for d in dates:
        day = d['date']
        games = d['games']
        
        games_list = []
        
        for game in games:
            gameID = game['gamePk']
            
            teams = game['teams']
            away = teams['away']
            away_name = away['team']['name']
            away_Id = away['team']['id']
            
            home = teams['home']
            home_name = home['team']['name']
            home_Id = home['team']['id']
            
            games_list.append({'gamePk': gameID, 
                               'home': {'ID': home_Id, 'name': home_name},
                               'away':{'ID': away_Id, 'name': away_name}})
            
        schedule_dict.update({day:games_list})
        
    return(schedule_dict)

In [25]:
get_schedule(teamId=141)

http://statsapi.mlb.com/api//v1/schedule?sportId=1&teamId=141


{'2021-06-15': [{'gamePk': 633653,
   'home': {'ID': 141, 'name': 'Toronto Blue Jays'},
   'away': {'ID': 147, 'name': 'New York Yankees'}}]}

### Add postseanson query

### Season

In [26]:
def get_season(season):
    """
    returns season start and end dates
    """
    season_url = base_url + "v1/seasons?season={}&sportId=1".format(season)
    season_content = get_json(season_url)
    
    return(season_content['seasons'][0])


In [27]:
get_season(2021)

{'seasonId': '2021',
 'hasWildcard': True,
 'regularSeasonStartDate': '2021-04-01',
 'regularSeasonEndDate': '2021-10-03',
 'preSeasonStartDate': '2021-02-28',
 'preSeasonEndDate': '2021-03-30',
 'postSeasonStartDate': '2021-10-04',
 'postSeasonEndDate': '2021-10-31',
 'lastDate1stHalf': '2021-07-11',
 'firstDate2ndHalf': '2021-07-15',
 'allStarDate': '2021-07-13',
 'seasonStartDate': '2021-02-28',
 'seasonEndDate': '2021-10-31'}

### Standings

In [28]:
def get_standings(leagueID = None, season = 2021):
    """
    leagueId is the mandatory parameter in this API query.
    """
    suffix = ""
    base_standings_url = base_url + "v1/standings"
    
    if leagueID is None:
        suffix += "?leagueId=103,104"
        
    else:
        suffix += "?leaugeId={}".format(leagueId)
    
    suffix += "&season={}".format(season)
    
    standings_url = base_standings_url + suffix
    records = get_json(standings_url)['records']
    
    division_dict = {}
    
    for record in records:
        
        division = lookup_division(record['division']['id'])
        
        team_records = record['teamRecords']
        
        tr_dict = {}
        
        for tr in team_records:

            team_name = tr['team']['name']
            rank = tr['divisionRank']
            gamesPlayed = tr['gamesPlayed']
            gamesBack = tr['gamesBack']
            wins = tr['leagueRecord']['wins']
            losses = tr['leagueRecord']['losses']
            pct = tr['leagueRecord']['pct']

            tr_dict.update({team_name:{"rank":rank, 'Played': gamesPlayed, "W": wins, "L": losses, "%": pct, "gamesBack": gamesBack}})
            
        division_dict.update({division:tr_dict})
        
    return(division_dict)

In [29]:
get_standings()

{'AL West': {'Oakland Athletics': {'rank': '1',
   'Played': 68,
   'W': 41,
   'L': 27,
   '%': '.603',
   'gamesBack': '-'},
  'Houston Astros': {'rank': '2',
   'Played': 65,
   'W': 37,
   'L': 28,
   '%': '.569',
   'gamesBack': '2.5'},
  'Los Angeles Angels': {'rank': '3',
   'Played': 66,
   'W': 33,
   'L': 33,
   '%': '.500',
   'gamesBack': '7.0'},
  'Seattle Mariners': {'rank': '4',
   'Played': 68,
   'W': 33,
   'L': 35,
   '%': '.485',
   'gamesBack': '8.0'},
  'Texas Rangers': {'rank': '5',
   'Played': 66,
   'W': 25,
   'L': 41,
   '%': '.379',
   'gamesBack': '15.0'}},
 'AL East': {'Tampa Bay Rays': {'rank': '1',
   'Played': 67,
   'W': 43,
   'L': 24,
   '%': '.642',
   'gamesBack': '-'},
  'Boston Red Sox': {'rank': '2',
   'Played': 67,
   'W': 40,
   'L': 27,
   '%': '.597',
   'gamesBack': '3.0'},
  'Toronto Blue Jays': {'rank': '3',
   'Played': 64,
   'W': 33,
   'L': 31,
   '%': '.516',
   'gamesBack': '8.5'},
  'New York Yankees': {'rank': '4',
   'Played': 

### Team

Team ID

In [30]:
def lookup_team(name):
    """
    returns personId for given player name
    """
    team_url = base_url + "/v1/teams?sportId=1"
    team_content = get_json(team_url)
    teams = team_content['teams']
    
    matching_team = [team for team in teams if str(name).lower() in str(team.values()).lower()]
    
    if len(matching_team) == 1:
        return(matching_team[0]['id'])
    
    return(matching_team)

#### Roster

In [31]:
def lookup_roster(teamId, season = 2021, rosterType = "40man"):
    """
    rosterType can be either one of "40man", "fullSeason", "full", or "active"
    """
    
    roster_url = base_url + "v1/teams/{teamId}/roster/{rosterType}?season={season}".format(teamId = teamId,
                                                                                          rosterType = rosterType,
                                                                                          season = season)
    
    roster = get_json(roster_url)['roster']
    
    return(roster)

In [32]:
lookup_roster(141)

[{'person': {'id': 666201,
   'fullName': 'Alek Manoah',
   'link': '/api/v1/people/666201'},
  'jerseyNumber': '6',
  'position': {'code': '1',
   'name': 'Pitcher',
   'type': 'Pitcher',
   'abbreviation': 'P'},
  'status': {'code': 'A', 'description': 'Active'},
  'parentTeamId': 141},
 {'person': {'id': 621593,
   'fullName': 'Anthony Castro',
   'link': '/api/v1/people/621593'},
  'jerseyNumber': '63',
  'position': {'code': '1',
   'name': 'Pitcher',
   'type': 'Pitcher',
   'abbreviation': 'P'},
  'status': {'code': 'A', 'description': 'Active'},
  'parentTeamId': 141},
 {'person': {'id': 666182,
   'fullName': 'Bo Bichette',
   'link': '/api/v1/people/666182'},
  'jerseyNumber': '11',
  'position': {'code': '6',
   'name': 'Shortstop',
   'type': 'Infielder',
   'abbreviation': 'SS'},
  'status': {'code': 'A', 'description': 'Active'},
  'parentTeamId': 141},
 {'person': {'id': 605218,
   'fullName': 'Carl Edwards Jr.',
   'link': '/api/v1/people/605218'},
  'jerseyNumber': '43