In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import statsapi
import json

import re

%matplotlib inline

In [3]:
team_params = {'activeStatus':'Y', 'season':2019, 'sportIds':1, 'fields':'teams,id,name,teamCode,fileCode,teamName,locationName,shortName'}
teams_info = statsapi.get('teams', team_params)

teams_dict = {}
for team in teams_info['teams']:
    teams_dict[team['fileCode']] = team['id']
teams_dict

{'ana': 108,
 'ari': 109,
 'atl': 144,
 'bal': 110,
 'bos': 111,
 'chc': 112,
 'cin': 113,
 'cle': 114,
 'col': 115,
 'cws': 145,
 'det': 116,
 'hou': 117,
 'kc': 118,
 'la': 119,
 'mia': 146,
 'mil': 158,
 'min': 142,
 'nym': 121,
 'nyy': 147,
 'oak': 133,
 'phi': 143,
 'pit': 134,
 'sd': 135,
 'sea': 136,
 'sf': 137,
 'stl': 138,
 'tb': 139,
 'tex': 140,
 'tor': 141,
 'was': 120}

In [4]:
print(statsapi.roster(teams_dict['stl']))

#50  P   Adam Wainwright
#7   C   Andrew Knizner
#21  P   Andrew Miller
#18  P   Carlos Martinez
#40  P   Chasen Shreve
#43  P   Dakota Hudson
#62  P   Daniel Ponce de Leon
#25  RF  Dexter Fowler
#55  P   Dominic Leone
#63  SS  Edmundo Sosa
#65  P   Giovanny Gallegos
#48  CF  Harrison Bader
#22  P   Jack Flaherty
#60  P   John Brebbia
#53  P   John Gant
#38  RF  Jose Martinez
#16  2B  Kolten Wong
#32  C   Matt Wieters
#52  P   Michael Wacha
#39  P   Miles Mikolas
#12  SS  Paul DeJong
#46  1B  Paul Goldschmidt
#19  SS  Tommy Edman
#41  LF  Tyler O'Neill
#34  2B  Yairo Munoz



In [6]:
statsapi.lookup_player("Chasen Shreve")

[{'boxscoreName': 'Shreve',
  'currentTeam': {'id': 138},
  'firstLastName': 'Chasen Shreve',
  'firstName': 'Chasen',
  'fullFMLName': 'Chasen Dean Shreve',
  'fullLFMName': 'Shreve, Chasen Dean',
  'fullName': 'Chasen Shreve',
  'id': 592741,
  'initLastName': 'C Shreve',
  'lastFirstName': 'Shreve, Chasen',
  'lastInitName': 'Shreve, C',
  'lastName': 'Shreve',
  'mlbDebutDate': '2014-07-19',
  'nameFirstLast': 'Chasen Shreve',
  'nickName': 'Sha-Reef',
  'primaryNumber': '40',
  'primaryPosition': {'abbreviation': 'P', 'code': '1'},
  'useName': 'Chasen'}]

In [37]:
def get_player_list(team_code):
    team_id = teams_dict[team_code]
    player_names = []
    roster = statsapi.roster(team_id)
    roster_list = roster.split("\n")[:-1]
    for player in roster_list:
        player_names.append(" ".join(player.split()[2:]))
    return player_names

get_player_list("sf")

['Alex Dickerson',
 'Andrew Suarez',
 'Austin Slater',
 'Brandon Belt',
 'Brandon Crawford',
 'Buster Posey',
 'Derek Holland',
 'Donovan Solano',
 'Drew Pomeranz',
 'Jeff Samardzija',
 'Joe Panik',
 'Kevin Pillar',
 'Madison Bumgarner',
 'Mark Melancon',
 'Mike Yastrzemski',
 'Pablo Sandoval',
 'Reyes Moronta',
 'Sam Dyson',
 'Shaun Anderson',
 'Stephen Vogt',
 'Tony Watson',
 'Trevor Gott',
 'Tyler Austin',
 'Tyler Beede',
 'Will Smith']

In [38]:
statsapi.lookup_player("Trevor Gott")

[{'boxscoreName': 'Gott',
  'currentTeam': {'id': 137},
  'firstLastName': 'Trevor Gott',
  'firstName': 'Trevor',
  'fullFMLName': 'Trevor Vaughan Gott',
  'fullLFMName': 'Gott, Trevor Vaughan',
  'fullName': 'Trevor Gott',
  'id': 641627,
  'initLastName': 'T Gott',
  'lastFirstName': 'Gott, Trevor',
  'lastInitName': 'Gott, T',
  'lastName': 'Gott',
  'mlbDebutDate': '2015-06-14',
  'nameFirstLast': 'Trevor Gott',
  'primaryNumber': '58',
  'primaryPosition': {'abbreviation': 'P', 'code': '1'},
  'useName': 'Trevor'}]

In [43]:
def get_player_id_from_name(player_name):
    try:
        return statsapi.lookup_player(player_name)[0]['id']
    except IndexError:
        return False

def check_pos_player(player_name):
    try:
        return statsapi.lookup_player(player_name)[0]['primaryPosition']['abbreviation'] != "P"
    except IndexError:
        return False

def get_current_season_stats(player_name):
    print(player_name)
    if not check_pos_player(player_name):
        raise ValueError("Player name entered is not a position player")
        
    stats_dict = {}
    player_id = get_player_id_from_name(player_name)
    
    # Look up the player's current season hitting stats
    get_player_stats = statsapi.player_stats(player_id, 'hitting') 
    
    # Get the stats for the most recent season
    curr_season_stats = get_player_stats.split("Season Hitting")[-1]
    
    #Break up the stats into a list
    stats_list = curr_season_stats.split("\n")[1:-2]
#     # To account for if the position player has pitched before, we limit 
#     # their stats list to just batting stats
#     if len(stats_list) > 28:
#         stats_list = stats_list[:28]
    for stat in stats_list:
        stat_name = re.search("[A-Za-z]+", stat).group()
        stat_val = re.search("[^:A-Za-z]+", stat).group()
        stats_dict[stat_name] = float(stat_val)
    return stats_dict

In [44]:
print(re.search("(?s)(gamesPlayed).*?(groundOutsToAirouts:\s[0-9].[0-9]{2})",
    statsapi.player_stats(get_player_id_from_name("Alex Dickerson"), 'hitting')).group().split("Season Hitting")[-1].group().split("\n"))

AttributeError: 'str' object has no attribute 'group'

In [45]:
print(statsapi.player_stats(get_player_id_from_name("Alex Dickerson"), 'hitting').split("Season Hitting")[-1].split("\n")[1:-2])

['gamesPlayed: 32', 'groundOuts: 14', 'runs: 17', 'doubles: 7', 'triples: 2', 'homeRuns: 4', 'strikeOuts: 19', 'baseOnBalls: 7', 'intentionalWalks: 1', 'hits: 28', 'hitByPitch: 1', 'avg: .350', 'atBats: 80', 'obp: .409', 'slg: .638', 'ops: 1.047', 'caughtStealing: 1', 'stolenBases: 1', 'stolenBasePercentage: .500', 'groundIntoDoublePlay: 1', 'numberOfPitches: 329', 'plateAppearances: 88', 'totalBases: 51', 'rbi: 21', 'leftOnBase: 33', 'sacBunts: 0', 'sacFlies: 0', 'babip: .421', 'groundOutsToAirouts: 0.74']


In [46]:
get_current_season_stats("Kevin Pillar")

Kevin Pillar


{'atBats': 360.0,
 'avg': 0.247,
 'babip': 0.258,
 'baseOnBalls': 11.0,
 'caughtStealing': 2.0,
 'doubles': 21.0,
 'gamesPlayed': 96.0,
 'groundIntoDoublePlay': 10.0,
 'groundOuts': 102.0,
 'groundOutsToAirouts': 0.86,
 'hitByPitch': 4.0,
 'hits': 89.0,
 'homeRuns': 12.0,
 'intentionalWalks': 3.0,
 'leftOnBase': 150.0,
 'numberOfPitches': 1234.0,
 'obp': 0.274,
 'ops': 0.686,
 'plateAppearances': 379.0,
 'rbi': 52.0,
 'runs': 50.0,
 'sacBunts': 0.0,
 'sacFlies': 4.0,
 'slg': 0.411,
 'stolenBasePercentage': 0.8,
 'stolenBases': 8.0,
 'strikeOuts': 54.0,
 'totalBases': 148.0,
 'triples': 1.0}

In [47]:
rows_list = []
for team in teams_dict:
    player_list = get_player_list(team)
    for player in player_list:
        try:
            player_stats_vals = list(get_current_season_stats(player).values())
            new_row = [player] + player_stats_vals
            rows_list.append(new_row)
        except ValueError:
            continue
        except IndexError: 
            continue
player_stats_table = pd.DataFrame(np.array(rows_list),
                                  columns=['name'] + list(get_current_season_stats("Kevin Pillar").keys()))
player_stats_table

Blake Treinen
Brett Anderson
Brian Schlitter
Chad Pinder
Chris Bassitt
Chris Herrmann
Daniel Mengden
Franklin Barreto
Homer Bailey
Joakim Soria
Josh Phegley
Jurickson Profar
Khris Davis
Liam Hendriks
Lou Trivino
Marcus Semien
Mark Canha
Matt Chapman
Matt Olson
Mike Fiers
Ramon Laureano
Robbie Grossman
Ryan Buchter
Wei-Chung Wang
Yusmeiro Petit
Adam Frazier
Bryan Reynolds
Chris Archer
Chris Stratton
Clay Holmes
Colin Moran
Corey Dickerson
Dario Agrazal
Elias Diaz
Felipe Vazquez
Francisco Liriano
Jacob Stallings
Joe Musgrove
Jordan Lyles
Jose Osuna
Josh Bell
Jung Ho Kang
Kevin Newman
Kyle Crick
Luis Escobar
Melky Cabrera
Michael Feliz
Richard Rodriguez
Starling Marte
Trevor Williams
Andres Munoz
Austin Hedges
Cal Quantrill
Chris Paddack
Craig Stammen
Dinelson Lamet
Eric Hosmer
Eric Lauer
Fernando Tatis Jr.
Francisco Mejia
Franmil Reyes
Gerardo Reyes
Greg Garcia
Hunter Renfroe
Ian Kinsler
Joey Lucchesi
Josh Naylor
Kirby Yates
Logan Allen
Luis Perdomo
Manny Machado
Manuel Margot
Matt Strah

Unnamed: 0,name,gamesPlayed,groundOuts,runs,doubles,triples,homeRuns,strikeOuts,baseOnBalls,intentionalWalks,...,groundIntoDoublePlay,numberOfPitches,plateAppearances,totalBases,rbi,leftOnBase,sacBunts,sacFlies,babip,groundOutsToAirouts
0,Chad Pinder,69.0,65.0,30.0,12.0,0.0,8.0,47.0,9.0,0.0,...,7.0,778.0,219.0,86.0,28.0,98.0,0.0,2.0,0.278,1.48
1,Franklin Barreto,10.0,11.0,5.0,1.0,0.0,2.0,13.0,1.0,0.0,...,0.0,130.0,34.0,12.0,4.0,17.0,0.0,0.0,0.167,2.75
2,Jurickson Profar,83.0,86.0,38.0,14.0,1.0,13.0,52.0,21.0,2.0,...,6.0,1264.0,328.0,120.0,45.0,131.0,0.0,1.0,0.22,0.88
3,Marcus Semien,97.0,111.0,66.0,24.0,3.0,14.0,59.0,48.0,1.0,...,9.0,1796.0,447.0,180.0,49.0,110.0,0.0,1.0,0.288,0.92
4,Mark Canha,63.0,41.0,40.0,8.0,1.0,15.0,44.0,34.0,0.0,...,5.0,953.0,223.0,101.0,26.0,71.0,0.0,2.0,0.25,0.79
5,Matt Chapman,94.0,89.0,63.0,25.0,3.0,22.0,74.0,44.0,0.0,...,6.0,1699.0,405.0,196.0,59.0,114.0,0.0,2.0,0.295,0.94
6,Ramon Laureano,96.0,66.0,60.0,21.0,0.0,19.0,99.0,19.0,0.0,...,6.0,1500.0,377.0,172.0,52.0,148.0,0.0,6.0,0.325,0.73
7,Robbie Grossman,79.0,60.0,35.0,16.0,1.0,5.0,45.0,38.0,1.0,...,5.0,1097.0,285.0,97.0,28.0,79.0,0.0,1.0,0.299,0.77
8,Adam Frazier,87.0,97.0,49.0,21.0,4.0,4.0,42.0,20.0,4.0,...,1.0,1333.0,356.0,133.0,26.0,117.0,2.0,1.0,0.312,0.98
9,Bryan Reynolds,73.0,61.0,40.0,19.0,2.0,7.0,59.0,27.0,0.0,...,4.0,1092.0,276.0,127.0,37.0,94.0,0.0,1.0,0.425,1.45


In [48]:
check_pos_player("Dylan Moore")

True

In [277]:
statsapi.lookup_player()

[]

In [None]:
print(re.search("(?s)(gamesPlayed).*?(groundOutsToAirouts:\s[0-9].[0-9]{2})",
                statsapi.player_stats(get_player_id_from_name(592192)).split("Season Hitting")[-1]).group())

In [269]:
re.search("(?s)(gamesPlayed).*?(?=Season Fielding)",
                statsapi.player_stats(get_player_id_from_name("Dylan Moore")).split("Season Hitting")[-1]).group().split("\n")[:-2][:28]

['gamesPlayed: 63',
 'groundOuts: 28',
 'runs: 16',
 'doubles: 7',
 'triples: 0',
 'homeRuns: 4',
 'strikeOuts: 49',
 'baseOnBalls: 14',
 'intentionalWalks: 0',
 'hits: 26',
 'hitByPitch: 6',
 'avg: .195',
 'atBats: 133',
 'obp: .301',
 'slg: .338',
 'ops: .639',
 'caughtStealing: 7',
 'stolenBases: 7',
 'groundIntoDoublePlay: 4',
 'numberOfPitches: 657',
 'plateAppearances: 154',
 'totalBases: 45',
 'rbi: 11',
 'leftOnBase: 55',
 'sacBunts: 1',
 'sacFlies: 0',
 'babip: .275',
 'groundOutsToAirouts: 0.90']

In [25]:
print(re.search("(?s)(gamesPlayed).*?(groundOutsToAirouts:\s[0-9].[0-9]{2})",
                statsapi.player_stats(get_player_id_from_name("Dylan Moore"), 'hitting')).group().split("\n"))

['gamesPlayed: 64', 'groundOuts: 29', 'runs: 17', 'doubles: 7', 'triples: 1', 'homeRuns: 4', 'strikeOuts: 50', 'baseOnBalls: 14', 'intentionalWalks: 0', 'hits: 27', 'hitByPitch: 6', 'avg: .197', 'atBats: 137', 'obp: .299', 'slg: .350', 'ops: .650', 'caughtStealing: 7', 'stolenBases: 7', 'stolenBasePercentage: .500', 'groundIntoDoublePlay: 5', 'numberOfPitches: 668', 'plateAppearances: 158', 'totalBases: 48', 'rbi: 12', 'leftOnBase: 58', 'sacBunts: 1', 'sacFlies: 0', 'babip: .277', 'groundOutsToAirouts: 0.91']


In [52]:
statsapi.last_game(teams_dict['sf'])

566517

In [56]:
statsapi.meta("statTypes")

[{'displayName': 'pecota'},
 {'displayName': 'pecotaRos'},
 {'displayName': 'yearByYear'},
 {'displayName': 'yearByYearAdvanced'},
 {'displayName': 'season'},
 {'displayName': 'seasonAdvanced'},
 {'displayName': 'career'},
 {'displayName': 'careerStatSplits'},
 {'displayName': 'gameLog'},
 {'displayName': 'playLog'},
 {'displayName': 'pitchLog'},
 {'displayName': 'metricLog'},
 {'displayName': 'metricAverages'},
 {'displayName': 'pitchArsenal'},
 {'displayName': 'outsAboveAverage'},
 {'displayName': 'sprayChart'},
 {'displayName': 'vsPlayer'},
 {'displayName': 'vsPlayerTotal'},
 {'displayName': 'vsPlayer5Y'},
 {'displayName': 'vsTeam'},
 {'displayName': 'vsTeam5Y'},
 {'displayName': 'vsTeamTotal'},
 {'displayName': 'lastXGames'},
 {'displayName': 'byDateRange'},
 {'displayName': 'byMonth'},
 {'displayName': 'byDayOfWeek'},
 {'displayName': 'rankings'},
 {'displayName': 'rankingsByYear'},
 {'displayName': 'hotColdZones'},
 {'displayName': 'availableStats'},
 {'displayName': 'opponentsFa

In [57]:
get_player_id_from_name("Mookie Betts")

605141

In [89]:
import datetime
curr_time = datetime.datetime.today()
past_time = curr_time - datetime.timedelta(days=30)
print(curr_time.strftime("%m/%d/%Y"))
print(past_time.strftime("%m/%d/%Y"))

07/18/2019
06/18/2019


In [90]:
datetime.datetime.today().strftime("%m/%d/%Y")

'07/18/2019'

In [112]:
def batting_past_N_days(N, player_id, end_date=datetime.datetime.today()):
    
    start_date = (end_date - datetime.timedelta(days=N)).strftime("%m/%d/%Y")
    end_date = end_date.strftime("%m/%d/%Y")
    hydrate = 'stats(group=[hitting],type=[byDateRange],startDate={},endDate={}),currentTeam'.format(start_date, end_date)
    
    params = {'personId':personId,'hydrate':hydrate}
    r = statsapi.get('person',params)
    
    return r['people'][0]['stats'][0]['splits'][0]['stat']

In [106]:
personId = get_player_id_from_name("Brandon Crawford") # Bryce Harper
startDate = '06/01/2019'
endDate = '06/30/2019'
hydrate = 'stats(group=[hitting,pitching,fielding],type=[byDateRange],startDate={},endDate={}),currentTeam'.format(startDate,endDate)

In [107]:
# The rest is the same as statsapi.player_stats() to produce the output for printing...
# The only differences are the hydrate param being a variable from above
# and I added a line to the output to include the date range.
# Resulting endpoint URL: https://statsapi.mlb.com/api/v1/people/547180?hydrate=stats(group=[hitting,pitching,fielding],type=[byDateRange],startDate=06/01/2019,endDate=06/30/2019),currentTeam

params = {'personId':personId,'hydrate':hydrate, 'sportId':1}
r = statsapi.get('person',params)

In [111]:
r['people'][0]['stats'][0]['splits'][0]['stat']

{'atBats': 82,
 'avg': '.244',
 'baseOnBalls': 6,
 'catchersInterference': 0,
 'caughtStealing': 0,
 'doubles': 9,
 'gamesPlayed': 23,
 'groundIntoDoublePlay': 3,
 'groundIntoTriplePlay': 0,
 'groundOuts': 25,
 'hitByPitch': 0,
 'hits': 20,
 'homeRuns': 2,
 'intentionalWalks': 1,
 'leftOnBase': 32,
 'obp': '.292',
 'ops': '.719',
 'plateAppearances': 89,
 'rbi': 11,
 'runs': 7,
 'sacBunts': 0,
 'sacFlies': 1,
 'slg': '.427',
 'stolenBasePercentage': '1.000',
 'stolenBases': 1,
 'strikeOuts': 19,
 'totalBases': 35,
 'triples': 0}

In [118]:
batting_past_N_days(22, get_player_id_from_name("Elias Diaz"))

{'atBats': 47,
 'avg': '.362',
 'baseOnBalls': 6,
 'catchersInterference': 0,
 'caughtStealing': 1,
 'doubles': 5,
 'gamesPlayed': 14,
 'groundIntoDoublePlay': 2,
 'groundIntoTriplePlay': 0,
 'groundOuts': 10,
 'hitByPitch': 0,
 'hits': 17,
 'homeRuns': 4,
 'intentionalWalks': 0,
 'leftOnBase': 16,
 'obp': '.434',
 'ops': '1.157',
 'plateAppearances': 53,
 'rbi': 13,
 'runs': 12,
 'sacBunts': 0,
 'sacFlies': 0,
 'slg': '.723',
 'stolenBasePercentage': '.000',
 'stolenBases': 0,
 'strikeOuts': 13,
 'totalBases': 34,
 'triples': 0}

In [148]:
def get_h2h_vs_pitcher(batter_id, opponent_id):
    
    hydrate = 'stats(group=[hitting],type=[vsPlayer],opposingPlayerId={},season=2019,sportId=1)'.format(opponent_id)
    params = {'personId': batter_id, 'hydrate':hydrate, 'sportId':1}
    r = statsapi.get('person',params)
    
    return r['people'][0]['stats'][1]['splits'][0]['stat']

In [147]:
hydrate = 'stats(group=[hitting],type=[vsPlayer],opposingPlayerId={},season=2019,sportId=1)'.format(get_player_id_from_name("Jacob DeGrom"))
params = {'personId':get_player_id_from_name("Bryce Harper"), 'hydrate':hydrate, 'sportId':1}
r = statsapi.get('person', params)
r['people'][0]['stats'][1]['splits'][0]['stat']

{'atBats': 34,
 'avg': '.294',
 'baseOnBalls': 7,
 'caughtStealing': 0,
 'doubles': 2,
 'groundIntoDoublePlay': 0,
 'groundOuts': 7,
 'hitByPitch': 0,
 'hits': 10,
 'homeRuns': 2,
 'intentionalWalks': 2,
 'numberOfPitches': 164,
 'obp': '.415',
 'ops': '.944',
 'plateAppearances': 41,
 'rbi': 2,
 'runs': 6,
 'sacBunts': 0,
 'sacFlies': 0,
 'slg': '.529',
 'stolenBasePercentage': '.---',
 'stolenBases': 0,
 'strikeOuts': 12,
 'totalBases': 18,
 'triples': 0}

In [150]:
get_h2h_vs_pitcher(get_player_id_from_name("Bryce Harper"), get_player_id_from_name("Noah Syndergaard"))

{'atBats': 29,
 'avg': '.276',
 'baseOnBalls': 5,
 'caughtStealing': 0,
 'doubles': 1,
 'groundIntoDoublePlay': 2,
 'groundOuts': 10,
 'hitByPitch': 1,
 'hits': 8,
 'homeRuns': 0,
 'intentionalWalks': 0,
 'numberOfPitches': 146,
 'obp': '.389',
 'ops': '.699',
 'plateAppearances': 36,
 'rbi': 4,
 'runs': 4,
 'sacBunts': 0,
 'sacFlies': 1,
 'slg': '.310',
 'stolenBasePercentage': '1.000',
 'stolenBases': 4,
 'strikeOuts': 8,
 'totalBases': 9,
 'triples': 0}