In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsapi
import json
from tqdm import tqdm
import re
import datetime

%matplotlib inline

In [6]:
team_params = {'activeStatus':'Y', 'season':2019, 'sportIds':1, 'fields':'teams,id,name,teamCode,fileCode,teamName,locationName,shortName'}
teams_info = statsapi.get('teams', team_params)

teams_dict = {}
for team in teams_info['teams']:
    teams_dict[team['fileCode']] = team['id']
teams_dict

{'ana': 108,
 'ari': 109,
 'atl': 144,
 'bal': 110,
 'bos': 111,
 'chc': 112,
 'cin': 113,
 'cle': 114,
 'col': 115,
 'cws': 145,
 'det': 116,
 'hou': 117,
 'kc': 118,
 'la': 119,
 'mia': 146,
 'mil': 158,
 'min': 142,
 'nym': 121,
 'nyy': 147,
 'oak': 133,
 'phi': 143,
 'pit': 134,
 'sd': 135,
 'sea': 136,
 'sf': 137,
 'stl': 138,
 'tb': 139,
 'tex': 140,
 'tor': 141,
 'was': 120}

In [7]:
def get_player_list(team_code):
    team_id = teams_dict[team_code]
    player_names = []
    roster = statsapi.roster(team_id)
    roster_list = roster.split("\n")[:-1]
    for player in roster_list:
        player_names.append(" ".join(player.split()[2:]))
    return player_names

get_player_list("sf")

['Alex Dickerson',
 'Austin Slater',
 'Brandon Belt',
 'Brandon Crawford',
 'Buster Posey',
 'Derek Holland',
 'Donovan Solano',
 'Drew Pomeranz',
 'Jeff Samardzija',
 'Joe Panik',
 'Kevin Pillar',
 'Madison Bumgarner',
 'Mark Melancon',
 'Mike Yastrzemski',
 'Pablo Sandoval',
 'Reyes Moronta',
 'Sam Dyson',
 'Shaun Anderson',
 'Stephen Vogt',
 'Tony Watson',
 'Trevor Gott',
 'Tyler Austin',
 'Tyler Beede',
 'Will Smith',
 'Williams Jerez']

In [8]:
statsapi.lookup_player("Trevor Gott")

[{'boxscoreName': 'Gott',
  'currentTeam': {'id': 137},
  'firstLastName': 'Trevor Gott',
  'firstName': 'Trevor',
  'fullFMLName': 'Trevor Vaughan Gott',
  'fullLFMName': 'Gott, Trevor Vaughan',
  'fullName': 'Trevor Gott',
  'id': 641627,
  'initLastName': 'T Gott',
  'lastFirstName': 'Gott, Trevor',
  'lastInitName': 'Gott, T',
  'lastName': 'Gott',
  'mlbDebutDate': '2015-06-14',
  'nameFirstLast': 'Trevor Gott',
  'primaryNumber': '58',
  'primaryPosition': {'abbreviation': 'P', 'code': '1'},
  'useName': 'Trevor'}]

In [15]:
def get_player_id_from_name(player_name):
    try:
        return statsapi.lookup_player(player_name)[0]['id']
    except IndexError:
        return False

def check_pos_player(player_name):
    try:
        return statsapi.lookup_player(player_name)[0]['primaryPosition']['abbreviation'] != "P"
    except IndexError:
        return False

def get_current_season_stats(player_name):

    if not check_pos_player(player_name):
        raise ValueError("Player name entered is not a position player")
        
    player_id = get_player_id_from_name(player_name)
    stats_dict = {"Name": player_name, "ID": player_id}
    
    # Look up the player's current season hitting stats
    get_player_stats = statsapi.player_stats(player_id, 'hitting') 
    
    # Get the stats for the most recent season
    curr_season_stats = get_player_stats.split("Season Hitting")[-1]
    
    #Break up the stats into a list
    stats_list = curr_season_stats.split("\n")[1:-2]
    for stat in stats_list:
        stat_name = re.search("[A-Za-z]+", stat).group()
        stat_val = re.search("[^:A-Za-z]+", stat).group()
        stats_dict[stat_name] = float(stat_val)
    return stats_dict

In [16]:
get_current_season_stats("Kevin Pillar")

{'ID': 607680,
 'Name': 'Kevin Pillar',
 'atBats': 360.0,
 'avg': 0.247,
 'babip': 0.258,
 'baseOnBalls': 11.0,
 'caughtStealing': 2.0,
 'doubles': 21.0,
 'gamesPlayed': 96.0,
 'groundIntoDoublePlay': 10.0,
 'groundOuts': 102.0,
 'groundOutsToAirouts': 0.86,
 'hitByPitch': 4.0,
 'hits': 89.0,
 'homeRuns': 12.0,
 'intentionalWalks': 3.0,
 'leftOnBase': 150.0,
 'numberOfPitches': 1234.0,
 'obp': 0.274,
 'ops': 0.686,
 'plateAppearances': 379.0,
 'rbi': 52.0,
 'runs': 50.0,
 'sacBunts': 0.0,
 'sacFlies': 4.0,
 'slg': 0.411,
 'stolenBasePercentage': 0.8,
 'stolenBases': 8.0,
 'strikeOuts': 54.0,
 'totalBases': 148.0,
 'triples': 1.0}

In [23]:
# This cell generates rows for a DataFrame of batting stats--one row per player

rows_list = []
for team in tqdm(teams_dict):
    player_list = get_player_list(team)
    for player in player_list:
        try:
            new_row = list(get_current_season_stats(player).values())
            rows_list.append(new_row)
        except ValueError:
            continue
        except IndexError: 
            continue

  0%|          | 0/30 [00:00<?, ?it/s]
  0%|          | 0/25 [00:00<?, ?it/s][A
  4%|▍         | 1/25 [00:00<00:10,  2.25it/s][A
  8%|▊         | 2/25 [00:00<00:10,  2.25it/s][A
 12%|█▏        | 3/25 [00:01<00:08,  2.56it/s][A
 16%|█▌        | 4/25 [00:02<00:11,  1.89it/s][A
 20%|██        | 5/25 [00:02<00:08,  2.32it/s][A
 24%|██▍       | 6/25 [00:03<00:11,  1.70it/s][A
 28%|██▊       | 7/25 [00:03<00:09,  1.99it/s][A
 32%|███▏      | 8/25 [00:04<00:10,  1.55it/s][A
 36%|███▌      | 9/25 [00:04<00:09,  1.68it/s][A
 40%|████      | 10/25 [00:05<00:08,  1.76it/s][A
 44%|████▍     | 11/25 [00:08<00:16,  1.17s/it][A
 48%|████▊     | 12/25 [00:09<00:15,  1.20s/it][A
 52%|█████▏    | 13/25 [00:10<00:13,  1.11s/it][A
 56%|█████▌    | 14/25 [00:10<00:10,  1.08it/s][A
 60%|██████    | 15/25 [00:10<00:07,  1.37it/s][A
 64%|██████▍   | 16/25 [00:11<00:07,  1.25it/s][A
 68%|██████▊   | 17/25 [00:13<00:09,  1.14s/it][A
 72%|███████▏  | 18/25 [00:14<00:07,  1.04s/it][A
 76%|█████

ValueError: Shape of passed values is (31, 275), indices imply (31, 31)

In [28]:
player_stats_table = pd.DataFrame(np.array(rows_list), 
                                  columns=list(get_current_season_stats("Kevin Pillar").keys()))
player_stats_table.head()

Unnamed: 0,Name,ID,gamesPlayed,groundOuts,runs,doubles,triples,homeRuns,strikeOuts,baseOnBalls,...,groundIntoDoublePlay,numberOfPitches,plateAppearances,totalBases,rbi,leftOnBase,sacBunts,sacFlies,babip,groundOutsToAirouts
0,Chad Pinder,640461,69.0,65.0,30.0,12.0,0.0,8.0,47.0,9.0,...,7.0,778.0,219.0,86.0,28.0,98.0,0.0,2.0,0.278,1.48
1,Franklin Barreto,620439,10.0,11.0,5.0,1.0,0.0,2.0,13.0,1.0,...,0.0,130.0,34.0,12.0,4.0,17.0,0.0,0.0,0.167,2.75
2,Jurickson Profar,595777,83.0,86.0,38.0,14.0,1.0,13.0,52.0,21.0,...,6.0,1264.0,328.0,120.0,45.0,131.0,0.0,1.0,0.22,0.88
3,Marcus Semien,543760,97.0,111.0,66.0,24.0,3.0,14.0,59.0,48.0,...,9.0,1796.0,447.0,180.0,49.0,110.0,0.0,1.0,0.288,0.92
4,Mark Canha,592192,63.0,41.0,40.0,8.0,1.0,15.0,44.0,34.0,...,5.0,953.0,223.0,101.0,26.0,71.0,0.0,2.0,0.25,0.79


In [29]:
player_stats_table.to_csv("player_stats_7_18_19.csv")

In [106]:
# These functions were defined with the help of toddrob99 on github, who developed the
# MLB-StatsAPI module. I made a post on reddit.com/r/mlbdata, which he mantains specifically to 
# answer questions about making API calls for specific purposes. I asked how to get stats
# over the past x days and how to get head-to-head batting stats. The post is linked
# here: https://www.reddit.com/r/mlbdata/comments/cewwfo/getting_headtohead_batting_stats_and_last_x_games/?

def batting_past_N_days(N, player_id, end_date=datetime.datetime.today()):
    
    start_date = (end_date - datetime.timedelta(days=N)).strftime("%m/%d/%Y")
    end_date = end_date.strftime("%m/%d/%Y")
    hydrate = ('stats(group=[hitting],type=[byDateRange],startDate={},endDate={}),currentTeam'.
               format(start_date, end_date))
    
    params = {'personId': player_id, 'hydrate':hydrate}
    r = statsapi.get('person',params)
    batting_stats = r['people'][0]['stats'][0]['splits'][0]['stat']
    
    # Only get rate stats for past N days
    filtered = {k + "_p{}d".format(N):float(v) for k, v in batting_stats.items() 
                if type(v) == str 
                and k != 'stolenBasePercentage'
                or k == 'hits'} 
    
    return filtered

def get_h2h_vs_pitcher(batter_id, opponent_id):
    
    hydrate = 'stats(group=[hitting],type=[vsPlayer],opposingPlayerId={},season=2019,sportId=1)'.format(opponent_id)
    params = {'personId': batter_id, 'hydrate':hydrate, 'sportId':1}
    r = statsapi.get('person',params)
    batting_stats = r['people'][0]['stats'][1]['splits'][0]['stat']
    
    # Only get rate stats vs pitcher
    filtered = {k + "_h2h":float(v) for k, v in batting_stats.items() 
                if type(v) == str 
                and k != 'stolenBasePercentage'
                or k == 'hits'} 
    
    return filtered

In [107]:
batting_past_N_days(22, get_player_id_from_name("Buster Posey"))

{'avg_p22d': 0.367,
 'hits_p22d': 18.0,
 'obp_p22d': 0.426,
 'ops_p22d': 0.977,
 'slg_p22d': 0.551}

In [108]:
get_h2h_vs_pitcher(get_player_id_from_name("Buster Posey"), get_player_id_from_name("Clayton Kershaw"))

{'avg_h2h': 0.225,
 'hits_h2h': 25.0,
 'obp_h2h': 0.271,
 'ops_h2h': 0.604,
 'slg_h2h': 0.333}

In [43]:
def pitching_past_N_days(N, player_id, end_date=datetime.datetime.today()):
    
    start_date = (end_date - datetime.timedelta(days=N)).strftime("%m/%d/%Y")
    end_date = end_date.strftime("%m/%d/%Y")
    hydrate = 'stats(group=[pitching],type=[byDateRange],startDate={},endDate={}),currentTeam'.format(start_date, end_date)
    
    params = {'personId': player_id, 'hydrate':hydrate}
    r = statsapi.get('person',params)
    
    return r['people'][0]['stats'][0]['splits'][0]['stat']

In [45]:
pitching_past_N_days(30, get_player_id_from_name("Wade LeBlanc"))

{'airOuts': 26,
 'atBats': 103,
 'avg': '.252',
 'balks': 0,
 'baseOnBalls': 7,
 'battersFaced': 110,
 'blownSaves': 0,
 'catchersInterference': 0,
 'caughtStealing': 0,
 'completeGames': 0,
 'doubles': 2,
 'earnedRuns': 10,
 'era': '3.38',
 'gamesPitched': 5,
 'gamesPlayed': 5,
 'gamesStarted': 1,
 'groundIntoDoublePlay': 2,
 'groundOuts': 30,
 'groundOutsToAirouts': '0.71',
 'hitBatsmen': 0,
 'hits': 26,
 'hitsPer9Inn': '8.78',
 'holds': 0,
 'homeRuns': 6,
 'homeRunsPer9': '2.06',
 'inheritedRunners': 5,
 'inheritedRunnersScored': 4,
 'inningsPitched': '26.2',
 'intentionalWalks': 1,
 'losses': 1,
 'numberOfPitches': 432,
 'outs': 80,
 'pickoffs': 0,
 'pitchesPerInning': '16.20',
 'runs': 10,
 'runsScoredPer9': '3.44',
 'sacBunts': 0,
 'sacFlies': 0,
 'saveOpportunities': 0,
 'saves': 0,
 'shutouts': 0,
 'stolenBasePercentage': '1.000',
 'stolenBases': 2,
 'strikeOuts': 21,
 'strikePercentage': '.650',
 'strikeoutWalkRatio': '3.00',
 'strikeoutsPer9Inn': '7.09',
 'strikes': 281,
 'tr

In [57]:
def check_pitcher_right_handed(pitcher_id):
    try:
        params = {'personId': pitcher_id}
        r = statsapi.get('person',params)
        return r['people'][0]['pitchHand']['code'] == 'R'
    except IndexError:
        return False

In [58]:
check_pitcher_right_handed(get_player_id_from_name("Lance Lynn"))

True

In [68]:
def check_batter_right_handed(batter_id):
    try:
        params = {'personId': batter_id}
        r = statsapi.get('person',params)
        return r['people'][0]['batSide']['code'] == 'R'
    except IndexError:
        return False

In [69]:
check_batter_right_handed(get_player_id_from_name("Bryce Harper"))

False

In [70]:
def check_pitcher_batter_opposite_hand(batter_id, pitcher_id):
    return check_pitcher_right_handed(pitcher_id) != check_batter_right_handed(batter_id)

In [72]:
check_pitcher_batter_opposite_hand(get_player_id_from_name("Bryce Harper"), get_player_id_from_name("Jacob DeGrom"))

True

In [110]:
statsapi.last_game(teams_dict['sf'])

566518

In [141]:
statsapi.get('schedule', {'sportId': '1', 'gamePk': 566516, 'hydrate':'probablePitcher'})['dates'][0]['games'][0]['teams']['home']['probablePitcher']

{'fullName': 'Samardzija, Jeff', 'id': 502188, 'link': '/api/v1/people/502188'}

In [137]:
statsapi.lookup_player("Buster Posey")

[{'boxscoreName': 'Posey',
  'currentTeam': {'id': 137},
  'firstLastName': 'Buster Posey',
  'firstName': 'Gerald',
  'fullFMLName': 'Gerald Dempsey Posey',
  'fullLFMName': 'Posey, Gerald Dempsey',
  'fullName': 'Buster Posey',
  'id': 457763,
  'initLastName': 'B Posey',
  'lastFirstName': 'Posey, Buster',
  'lastInitName': 'Posey, B',
  'lastName': 'Posey',
  'mlbDebutDate': '2009-09-11',
  'nameFirstLast': 'Buster Posey',
  'primaryNumber': '28',
  'primaryPosition': {'abbreviation': 'C', 'code': '2'},
  'useName': 'Buster'}]

In [142]:
def get_opposing_pitcher(player_id, game_id):
    teams = statsapi.get('schedule', {'sportId': '1', 'gamePk': game_id, 
                                      'hydrate':'probablePitcher'})['dates'][0]['games'][0]['teams']
    home_team_id = teams['home']['team']['id']
    away_team_id = teams['away']['team']['id']
    
    home_prob_pitcher = teams['home']['probablePitcher']['fullName']
    away_prob_pitcher = teams['away']['probablePitcher']['fullName']
    
    if statsapi.lookup_player(player_id)[0]['currentTeam']['id'] == home_team_id:
        return away_prob_pitcher
    else: 
        return home_prob_pitcher

In [144]:
get_opposing_pitcher(457763, 566516)

'Flaherty, Jack'

In [145]:
statsapi.schedule('07/07/2019')

[{'away_id': 110,
  'away_name': 'Baltimore Orioles',
  'away_pitcher_note': 'The Orioles changed their plans on Saturday, opting for Wojciechowski  in this start instead of Gabriel Ynoa. Wojciechowski has made one start since joining Baltimore, but posted a 3.61 ERA over 15 starts in Triple-A with Cleveland earlier this season.',
  'away_probable_pitcher': 'Wojciechowski, Asher',
  'away_score': 1,
  'current_inning': 9,
  'doubleheader': 'N',
  'game_date': '2019-07-07',
  'game_datetime': '2019-07-07T17:07:00Z',
  'game_id': 566911,
  'game_num': 1,
  'game_type': 'R',
  'home_id': 141,
  'home_name': 'Toronto Blue Jays',
  'home_pitcher_note': 'Thornton will take the mound in the team’s final game before the All-Star break. The 25-year-old right-hander is coming off the worst start of his career, in which he allowed seven runs on 11 hits over 2 2/3 innings against the Red Sox on Tuesday. He will be looking for a better outing against Baltimore.',
  'home_probable_pitcher': 'Thornto