In [114]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsapi
import json
import re
import datetime

from tqdm import tqdm
from collections import OrderedDict

%matplotlib inline

In [115]:
def generate_teams_dict():
    team_params = {'activeStatus':'Y', 'season':2019, 'sportIds':1, 'fields':'teams,id,name,teamCode,fileCode,teamName,locationName,shortName,venue'}
    teams_info = statsapi.get('teams', team_params)

    teams_dict = {}
    for team in teams_info['teams']:
        teams_dict[team['fileCode']] = team['id']
    return teams_dict

In [116]:
def get_yesterdays_games():
    yesterday = datetime.datetime.today() - datetime.timedelta(days=1)
    return yesterday.strftime("%m/%d/%Y")

In [117]:
def get_todays_games():
    return statsapi.schedule(datetime.datetime.today().strftime("%m/%d/%Y"))

def matchups_and_prob_pitchers_dicts(day='today'):
    games_today = get_todays_games()
    if day == 'yesterday':
        games_today = get_yesterdays_games()
        
    home_away_dict = {}
    prob_pitchers_dict = {}
    for game in games_today:
        prob_pitchers_dict[game['away_id']] = game['away_probable_pitcher']
        prob_pitchers_dict[game['home_id']] = game['home_probable_pitcher']
        
        home_away_dict[game['home_id']] = game['away_id']
        
    away_home_dict = {v:k for k, v in home_away_dict.items()}
    return home_away_dict, away_home_dict, prob_pitchers_dict

In [118]:
def get_player_list(team_id):
    player_names = []
    roster = statsapi.roster(team_id)
    roster_list = roster.split("\n")[:-1]
    for player in roster_list:
        player_names.append(" ".join(player.split()[2:]))
    return player_names

get_player_list(137)

['Alex Dickerson',
 'Austin Slater',
 'Brandon Belt',
 'Brandon Crawford',
 'Buster Posey',
 'Donovan Solano',
 'Drew Pomeranz',
 'Jeff Samardzija',
 'Joe Panik',
 'Kevin Pillar',
 'Madison Bumgarner',
 'Mark Melancon',
 'Mike Yastrzemski',
 'Pablo Sandoval',
 'Reyes Moronta',
 'Sam Coonrod',
 'Sam Dyson',
 'Shaun Anderson',
 'Stephen Vogt',
 'Tony Watson',
 'Trevor Gott',
 'Tyler Austin',
 'Tyler Beede',
 'Will Smith',
 'Zach Green']

In [119]:
def get_player_id_from_name(player_name):
    try:
        return statsapi.lookup_player(player_name)[0]['id']
    except IndexError:
        return False

def check_pos_player(player_name):
    try:
        return statsapi.lookup_player(player_name)[0]['primaryPosition']['abbreviation'] != "P"
    except IndexError:
        return False

def get_current_season_stats(player_name):

    if not check_pos_player(player_name):
        raise ValueError("Player name entered is not a position player")
        
    player_id = get_player_id_from_name(player_name)
    stats_dict = OrderedDict({"Name": player_name, "ID": player_id, 
                  "Team": statsapi.lookup_player(player_id)[0]['currentTeam']['id']})
    
    # Look up the player's current season hitting stats
    get_player_stats = statsapi.player_stats(player_id, 'hitting') 
    
    # Get the stats for the most recent season
    curr_season_stats = get_player_stats.split("Season Hitting")[-1]
    
    #Break up the stats into a list
    stats_list = curr_season_stats.split("\n")[1:-2]
    for stat in stats_list:
        stat_name = re.search("[A-Za-z]+", stat).group()
        stat_val = re.search("[^:A-Za-z]+", stat).group()
        try:
            stats_dict[stat_name] = float(stat_val)
        except ValueError:
            stats_dict[stat_name] = 0.0
    return stats_dict

In [122]:
# These functions were defined with the help of toddrob99 on github, who developed the
# MLB-StatsAPI module. I made a post on reddit.com/r/mlbdata, which he mantains to 
# answer questions about making API calls for specific purposes. I asked how to get stats
# over the past x days and how to get head-to-head batting stats. The post is linked
# here: https://www.reddit.com/r/mlbdata/comments/cewwfo/getting_headtohead_batting_stats_and_last_x_games/?

def batting_past_N_days(N, player_id, end_date=datetime.datetime.today()):
    
    start_date = (end_date - datetime.timedelta(days=N)).strftime("%m/%d/%Y")
    end_date = end_date.strftime("%m/%d/%Y")
    hydrate = ('stats(group=[hitting],type=[byDateRange],startDate={},endDate={}),currentTeam'.
               format(start_date, end_date))
    
    params = {'personId': player_id, 'hydrate':hydrate}
    r = statsapi.get('person',params)
    batting_stats = r['people'][0]['stats'][0]['splits'][0]['stat']

    # Only get rate stats for past N days
    filtered = {k + "_p{}d".format(N):float(v) for k, v in batting_stats.items() 
                if type(v) == str 
                and k != 'stolenBasePercentage'
                or k == 'hits'} 
    filtered = OrderedDict(sorted(filtered.items()))
    
    return filtered

def get_h2h_vs_pitcher(batter_id, opponent_id):
    
    hydrate = 'stats(group=[hitting],type=[vsPlayer],opposingPlayerId={},season=2019,sportId=1)'.format(opponent_id)
    params = {'personId': batter_id, 'hydrate':hydrate, 'sportId':1}
    r = statsapi.get('person',params)
    
    try: 
        batting_stats = r['people'][0]['stats'][1]['splits'][0]['stat']
    except KeyError:
        return OrderedDict({'atBats_h2h': 0.0, 'avg_h2h': 0.0, 'hits_h2h': 0.0, 
                            'obp_h2h': 0.0, 'ops_h2h': 0.0, 'slg_h2h': 0.0})
    
    # Only get rate stats vs pitcher
    filtered = {k + "_h2h":float(v) for k, v in batting_stats.items() 
                if type(v) == str 
                and k != 'stolenBasePercentage'
                or k == 'hits'
                or k == 'atBats'} 
    
    filtered = OrderedDict(sorted(filtered.items()))
    
    return filtered

In [123]:
def pitching_past_N_days(N, player_id, end_date=datetime.datetime.today()):
    
    start_date = (end_date - datetime.timedelta(days=N)).strftime("%m/%d/%Y")
    end_date = end_date.strftime("%m/%d/%Y")
    hydrate = 'stats(group=[pitching],type=[byDateRange],startDate={},endDate={}),currentTeam'.format(start_date, end_date)
    
    params = {'personId': player_id, 'hydrate':hydrate}
    r = statsapi.get('person',params)
    pitching_stats = r['people'][0]['stats'][0]['splits'][0]['stat']
    
    # Only get rate stats for past N days
    filtered = {k + "_p{}d".format(N):float(v) for k, v in pitching_stats.items() 
                if type(v) == str and v != ".---"} 
    
    return filtered

In [124]:
def check_pitcher_right_handed(pitcher_id):
    try:
        params = {'personId': pitcher_id}
        r = statsapi.get('person',params)
        return r['people'][0]['pitchHand']['code'] == 'R'
    except IndexError:
        return False

In [125]:
def check_batter_right_handed(batter_id):
    try:
        params = {'personId': batter_id}
        r = statsapi.get('person',params)
        return r['people'][0]['batSide']['code'] == 'R'
    except IndexError:
        return False

In [126]:
def check_pitcher_batter_opposite_hand(batter_id, pitcher_id):
    return check_pitcher_right_handed(pitcher_id) != check_batter_right_handed(batter_id)

In [127]:
def player_got_hit_in_game(player_id, game_id, home_or_away):
    
    params = {'gamePk':game_id,
      'fields': 'gameData,teams,teamName,shortName,teamStats,batting,atBats,runs,hits,rbi,strikeOuts,baseOnBalls,leftOnBase,players,boxscoreName,liveData,boxscore,teams,players,id,fullName,batting,avg,ops,era,battingOrder,info,title,fieldList,note,label,value'}
    r = statsapi.get('game', params)
    player_stats = r['liveData']['boxscore']['teams'][home_or_away]['players'].get('ID' + str(player_id), False)
    if not player_stats: 
        return False 
    else:
        return player_stats['stats']['batting'].get('hits', 0) > 0

In [128]:
def convert_to_FL_format(name):
    last_first = name.split(",")
    last_first.reverse()
    last_first[0] = last_first[0].strip()
    return " ".join(last_first)

In [129]:
def get_opposing_pitcher(player_id, game_id):
    teams = statsapi.get('schedule', {'sportId': '1', 'gamePk': game_id, 
                                      'hydrate':'probablePitcher'})['dates'][0]['games'][0]['teams']
    home_team_id = teams['home']['team']['id']
    away_team_id = teams['away']['team']['id']
    
    home_prob_pitcher = teams['home']['probablePitcher']['fullName']
    away_prob_pitcher = teams['away']['probablePitcher']['fullName']
    
    if statsapi.lookup_player(player_id)[0]['currentTeam']['id'] == home_team_id:
        return away_prob_pitcher
    else: 
        return home_prob_pitcher

In [130]:
def batting_past_N_games(N, player_id):
    hydrate = 'stats(group=[hitting],type=[lastXGames],limit={}),currentTeam'.format(N)
    
    params = {'personId': player_id, 'hydrate':hydrate}
    r = statsapi.get('person',params)
    batting_stats = r['people'][0]['stats'][0]['splits'][0]['stat']
    
    # Only get rate stats for past N days
    filtered = {k + "_p{}G".format(N):float(v) for k, v in batting_stats.items() 
                if type(v) == str 
                and k != 'stolenBasePercentage'
                or k == 'hits'} 
    
    filtered = OrderedDict(sorted(filtered.items()))
    
    return filtered

def pitching_past_N_games(N, player_id):
    hydrate = 'stats(group=[pitching],type=[lastXGames],limit={}),currentTeam'.format(N)
    
    params = {'personId': player_id, 'hydrate':hydrate}
    r = statsapi.get('person',params)
    pitching_stats = r['people'][0]['stats'][0]['splits'][0]['stat']
    
    # Only get rate stats for past N days
    filtered = {(k + "_p{}G".format(N)):(float(v) if v != '.---' else 0.0) 
                for k, v in pitching_stats.items() 
                if type(v) == str} 
    
    filtered = OrderedDict(sorted(filtered.items()))
    
    return filtered

In [136]:
# This cell generates rows for a DataFrame of batting stats--one row per player

yesterday = (datetime.datetime.today() - datetime.timedelta(days = 1)).strftime("%m/%d/%Y")
today = datetime.datetime.today().strftime("%m/%d/%Y")

rows_list = []
for game in tqdm(statsapi.schedule(today)):
    
    if game['status'] not in ['In Progress', 'Final']:
        continue
    
    game_id = game['game_id']
    away_id = game['away_id']
    home_id = game['home_id']
    home_player_list = get_player_list(home_id)
    away_player_list = get_player_list(away_id)
    
    away_prob_Pname = convert_to_FL_format(game['away_probable_pitcher'])
    home_prob_Pname = convert_to_FL_format(game['home_probable_pitcher'])
    
    away_probable_pitcher = get_player_id_from_name(away_prob_Pname)
    home_probable_pitcher = get_player_id_from_name(home_prob_Pname)
    
    away_pitcher_p5G = pitching_past_N_games(5, away_probable_pitcher)
    home_pitcher_p5G = pitching_past_N_games(5, home_probable_pitcher)
    
    for player in home_player_list:
#         print(player)
        player_id = get_player_id_from_name(player)
        try:
            new_row = list(get_current_season_stats(player).values())
            new_row += list(batting_past_N_games(7, player_id).values())
            new_row += list(batting_past_N_games(15, player_id).values())
            new_row += list(away_pitcher_p5G.values())
            new_row += list(get_h2h_vs_pitcher(player_id, away_probable_pitcher).values())
            new_row.append(float(check_pitcher_batter_opposite_hand(batter_id=player_id, 
                                                                  pitcher_id=away_probable_pitcher)))
            new_row.append(player_got_hit_in_game(player_id, game_id, 'home'))
            rows_list.append(new_row)
        except (ValueError, IndexError):
            continue

    for player in away_player_list:
#         print(player)
        player_id = get_player_id_from_name(player)
        try:
            new_row = list(get_current_season_stats(player).values())
            new_row += list(batting_past_N_games(7, player_id).values())
            new_row += list(batting_past_N_games(15, player_id).values())
            new_row += list(home_pitcher_p5G.values())
            new_row += list(get_h2h_vs_pitcher(player_id, home_probable_pitcher).values())
            new_row.append(float(check_pitcher_batter_opposite_hand(batter_id=player_id, 
                                                                  pitcher_id=away_probable_pitcher)))   
            new_row.append(player_got_hit_in_game(player_id, game_id, 'away'))
            rows_list.append(new_row)
        except (ValueError, IndexError):
            continue



  0%|          | 0/9 [00:00<?, ?it/s][A
 11%|█         | 1/9 [01:03<08:29, 63.66s/it][A
 22%|██▏       | 2/9 [02:09<07:30, 64.30s/it][A
 33%|███▎      | 3/9 [04:24<08:32, 85.40s/it][A
 44%|████▍     | 4/9 [05:19<06:22, 76.47s/it][A
 56%|█████▌    | 5/9 [06:19<04:46, 71.60s/it][A
 67%|██████▋   | 6/9 [07:25<03:29, 69.83s/it][A
100%|██████████| 9/9 [07:25<00:00, 49.52s/it][A

In [137]:
sample_hitter = get_player_id_from_name("Kevin Pillar")
sample_pitcher = get_player_id_from_name("Jacob DeGrom")
player_stats_columns = list(get_current_season_stats("Kevin Pillar").keys())
player_stats_columns += list(batting_past_N_games(7, sample_hitter).keys())
player_stats_columns += list(batting_past_N_games(15, sample_hitter).keys())
player_stats_columns += list(pitching_past_N_games(5, sample_pitcher).keys())
player_stats_columns += list(get_h2h_vs_pitcher(sample_hitter, sample_pitcher).keys())
player_stats_columns += ['pitcher_hitter_opposite_hand', 'player_got_hit']

In [138]:
player_stats_table = pd.DataFrame(data=rows_list, columns=player_stats_columns)
player_stats_table

Unnamed: 0,Name,ID,Team,gamesPlayed,groundOuts,runs,doubles,triples,homeRuns,strikeOuts,...,whip_p5G,winPercentage_p5G,atBats_h2h,avg_h2h,hits_h2h,obp_h2h,ops_h2h,slg_h2h,pitcher_hitter_opposite_hand,player_got_hit
0,Adeiny Hechavarria,588751,121,51.0,39.0,19.0,6.0,0.0,5.0,22.0,...,1.50,0.00,0.0,0.000,0.0,0.000,0.000,0.000,1.0,False
1,Amed Rosario,642708,121,99.0,100.0,47.0,20.0,5.0,11.0,85.0,...,1.50,0.00,4.0,0.250,1.0,0.400,1.150,0.750,1.0,False
2,Dominic Smith,642086,121,87.0,39.0,34.0,10.0,0.0,10.0,44.0,...,1.50,0.00,0.0,0.000,0.0,0.000,0.000,0.000,0.0,False
3,J.D. Davis,605204,121,86.0,68.0,31.0,11.0,1.0,9.0,52.0,...,1.50,0.00,1.0,1.000,1.0,1.000,3.000,2.000,1.0,True
4,Jeff McNeil,643446,121,88.0,81.0,50.0,27.0,1.0,9.0,48.0,...,1.50,0.00,0.0,0.000,0.0,0.000,0.000,0.000,0.0,False
5,Juan Lagares,501571,121,84.0,45.0,19.0,6.0,0.0,2.0,50.0,...,1.50,0.00,1.0,0.000,0.0,0.000,0.000,0.000,1.0,True
6,Michael Conforto,624424,121,93.0,62.0,52.0,18.0,0.0,18.0,93.0,...,1.50,0.00,4.0,1.000,4.0,1.000,2.750,1.750,0.0,True
7,Pete Alonso,624413,121,101.0,85.0,64.0,21.0,2.0,33.0,108.0,...,1.50,0.00,1.0,1.000,1.0,1.000,2.000,1.000,1.0,True
8,Robinson Cano,429664,121,77.0,94.0,29.0,19.0,0.0,9.0,53.0,...,1.50,0.00,2.0,0.500,1.0,0.500,1.000,0.500,0.0,True
9,Todd Frazier,453943,121,78.0,60.0,35.0,8.0,2.0,13.0,68.0,...,1.50,0.00,1.0,1.000,1.0,1.000,3.000,2.000,1.0,True


In [139]:
player_stats_table.to_csv("player_stats_{}.csv".format(yesterday.replace("/", "_")), index=False)

In [182]:
hydrate = 'stats(group=[hitting],type=[careerStatSplits], sitCodes=[ven])'
params = {'personId': get_player_id_from_name("Buster Posey"), 'hydrate':hydrate, 'sportId':1}
r = statsapi.get('person',params)
r

ValueError: Request failed. Status Code: 500.

In [107]:
batting_past_N_days(22, get_player_id_from_name("Buster Posey"))

{'avg_p22d': 0.367,
 'hits_p22d': 18.0,
 'obp_p22d': 0.426,
 'ops_p22d': 0.977,
 'slg_p22d': 0.551}

In [108]:
get_h2h_vs_pitcher(get_player_id_from_name("Buster Posey"), get_player_id_from_name("Clayton Kershaw"))

{'avg_h2h': 0.225,
 'hits_h2h': 25.0,
 'obp_h2h': 0.271,
 'ops_h2h': 0.604,
 'slg_h2h': 0.333}

In [45]:
pitching_past_N_days(30, get_player_id_from_name("Wade LeBlanc"))

{'airOuts': 26,
 'atBats': 103,
 'avg': '.252',
 'balks': 0,
 'baseOnBalls': 7,
 'battersFaced': 110,
 'blownSaves': 0,
 'catchersInterference': 0,
 'caughtStealing': 0,
 'completeGames': 0,
 'doubles': 2,
 'earnedRuns': 10,
 'era': '3.38',
 'gamesPitched': 5,
 'gamesPlayed': 5,
 'gamesStarted': 1,
 'groundIntoDoublePlay': 2,
 'groundOuts': 30,
 'groundOutsToAirouts': '0.71',
 'hitBatsmen': 0,
 'hits': 26,
 'hitsPer9Inn': '8.78',
 'holds': 0,
 'homeRuns': 6,
 'homeRunsPer9': '2.06',
 'inheritedRunners': 5,
 'inheritedRunnersScored': 4,
 'inningsPitched': '26.2',
 'intentionalWalks': 1,
 'losses': 1,
 'numberOfPitches': 432,
 'outs': 80,
 'pickoffs': 0,
 'pitchesPerInning': '16.20',
 'runs': 10,
 'runsScoredPer9': '3.44',
 'sacBunts': 0,
 'sacFlies': 0,
 'saveOpportunities': 0,
 'saves': 0,
 'shutouts': 0,
 'stolenBasePercentage': '1.000',
 'stolenBases': 2,
 'strikeOuts': 21,
 'strikePercentage': '.650',
 'strikeoutWalkRatio': '3.00',
 'strikeoutsPer9Inn': '7.09',
 'strikes': 281,
 'tr

In [58]:
check_pitcher_right_handed(get_player_id_from_name("Lance Lynn"))

True

In [69]:
check_batter_right_handed(get_player_id_from_name("Bryce Harper"))

False

In [72]:
check_pitcher_batter_opposite_hand(get_player_id_from_name("Bryce Harper"), get_player_id_from_name("Jacob DeGrom"))

True

In [110]:
statsapi.last_game(teams_dict['sf'])

566518

In [141]:
statsapi.get('schedule', {'sportId': '1', 'gamePk': 566516, 'hydrate':'probablePitcher'})['dates'][0]['games'][0]['teams']['home']['probablePitcher']

{'fullName': 'Samardzija, Jeff', 'id': 502188, 'link': '/api/v1/people/502188'}

In [137]:
statsapi.lookup_player("Buster Posey")

[{'boxscoreName': 'Posey',
  'currentTeam': {'id': 137},
  'firstLastName': 'Buster Posey',
  'firstName': 'Gerald',
  'fullFMLName': 'Gerald Dempsey Posey',
  'fullLFMName': 'Posey, Gerald Dempsey',
  'fullName': 'Buster Posey',
  'id': 457763,
  'initLastName': 'B Posey',
  'lastFirstName': 'Posey, Buster',
  'lastInitName': 'Posey, B',
  'lastName': 'Posey',
  'mlbDebutDate': '2009-09-11',
  'nameFirstLast': 'Buster Posey',
  'primaryNumber': '28',
  'primaryPosition': {'abbreviation': 'C', 'code': '2'},
  'useName': 'Buster'}]

In [144]:
get_opposing_pitcher(457763, 566516)

'Flaherty, Jack'

In [151]:
statsapi.schedule('07/07/2019')

[{'away_id': 110,
  'away_name': 'Baltimore Orioles',
  'away_pitcher_note': 'The Orioles changed their plans on Saturday, opting for Wojciechowski  in this start instead of Gabriel Ynoa. Wojciechowski has made one start since joining Baltimore, but posted a 3.61 ERA over 15 starts in Triple-A with Cleveland earlier this season.',
  'away_probable_pitcher': 'Wojciechowski, Asher',
  'away_score': 1,
  'current_inning': 9,
  'doubleheader': 'N',
  'game_date': '2019-07-07',
  'game_datetime': '2019-07-07T17:07:00Z',
  'game_id': 566911,
  'game_num': 1,
  'game_type': 'R',
  'home_id': 141,
  'home_name': 'Toronto Blue Jays',
  'home_pitcher_note': 'Thornton will take the mound in the team’s final game before the All-Star break. The 25-year-old right-hander is coming off the worst start of his career, in which he allowed seven runs on 11 hits over 2 2/3 innings against the Red Sox on Tuesday. He will be looking for a better outing against Baltimore.',
  'home_probable_pitcher': 'Thornto

In [169]:
statsapi.meta("statTypes")

[{'displayName': 'pecota'},
 {'displayName': 'pecotaRos'},
 {'displayName': 'yearByYear'},
 {'displayName': 'yearByYearAdvanced'},
 {'displayName': 'season'},
 {'displayName': 'seasonAdvanced'},
 {'displayName': 'career'},
 {'displayName': 'careerStatSplits'},
 {'displayName': 'gameLog'},
 {'displayName': 'playLog'},
 {'displayName': 'pitchLog'},
 {'displayName': 'metricLog'},
 {'displayName': 'metricAverages'},
 {'displayName': 'pitchArsenal'},
 {'displayName': 'outsAboveAverage'},
 {'displayName': 'sprayChart'},
 {'displayName': 'vsPlayer'},
 {'displayName': 'vsPlayerTotal'},
 {'displayName': 'vsPlayer5Y'},
 {'displayName': 'vsTeam'},
 {'displayName': 'vsTeam5Y'},
 {'displayName': 'vsTeamTotal'},
 {'displayName': 'lastXGames'},
 {'displayName': 'byDateRange'},
 {'displayName': 'byMonth'},
 {'displayName': 'byDayOfWeek'},
 {'displayName': 'rankings'},
 {'displayName': 'rankingsByYear'},
 {'displayName': 'hotColdZones'},
 {'displayName': 'availableStats'},
 {'displayName': 'opponentsFa