In [18]:
from pprint import pprint

import pandas as pd
from zbaseballdata.client import ZBaseballDataClient

client = ZBaseballDataClient("USERNAME", "PASSWORD")

# Pull Hitting Statistics

#### The function definition for the Python Client. 
```python
def get_batting_stat_split(
    self,
    retro_id: str,
    stats: List[str],           # E.g. ["H", "SO", "OPS", "BB"]
    agg_by: str, 
    vs_pitcher: str = None,     # a pitchers retro_id
    game_type: str = None,      # ALCS, POST, REG, WS, NLWC, etc... None is all games
    pitcher_throw: str = None,  # R/L (None for all)
    start_date: str = None,     # String of the form YYYY-MM-DD
    end_date: str = None,       # String of the form YYYY-MM-DD
    year: int = None,           # Or simply use this to filter by year. 
):
```

The return type is a dictionary, which has keys as stats, and values as dictionaries. These dictionaries have values as the split-datum mapping. 

The return values can be passed directly into pd.DataFrame to return a nicely formatted pandas dataframe. 

Valid Stats to choose from:
 - PA
 - AB
 - H
 - 2B
 - 3B 
 - HR 
 - BB
 - IBB 
 - SO
 - ROE
 - GIDP
 - SH (sac hits)
 - SF 
 - G (games played)
 - HBP
 - RBI
 - R (runs scored)
 - SB
 - CS
 - GB (ground balls hit)
 - FB (fly balls hit)
 - PF (pop flies hit)
 - LD (line drives hit)
 - AVG
 - OBP
 - SLG
 - OPS

Valid `agg_by` arguments (how to aggregate data "row-wise"):
 - C (by career totals)
 - D (by day - Double headers aggregate to a single day)
 - G (by game / retrosheet unique game id)
 - W (by week)
 - M (by month)
 - MY (by month/year)
 - Y (by year)
 - DOW (by day of the week)
 - INN (by inning)
 - COP (by count on play)
 - PARK_ID (by retrosheet park ID, see /api/v1/parks/)
 - PARK_NAME (by human readable park name)
 - DAYNIGHT (split by day/night)
 - BATTINGORDER (by order in the lineup)
 - FIELD_POS (by position in the field)
 - BY_OPP_TEAM (by opposing team)
 - BY_BASES_OCCUPIED (by how the bases are filled when the event occured)
 - VS_PITCHER_HAND (RHP / LHP)
 - HOME_AWAY (by home / away)

## Some Basic Examples Below

This is not an exhaustive list of examples, but hopefully can show the strengths of using the Python Client. 

### Career Stats

In [44]:
jeter_id = next(client.get_players(search="derek jeter"))["retro_id"]
requested_stats = [
    "H",
    "SO",
    "AB",
    "2B",
    "3B",
    "BB",
    "HR",
    "OPS",
]

# Career including postseason (but not spring training). If only regular season required, 
# set game_type="REG" in the call below. 
client.get_batting_stat_split(
    retro_id=jeter_id, 
    stats=requested_stats,
    agg_by="C",
)

{'H': {'career': 3665},
 'SO': {'career': 1975},
 'AB': {'career': 11845},
 '2B': {'career': 576},
 '3B': {'career': 71},
 'BB': {'career': 1148},
 'HR': {'career': 280},
 'OPS': {'career': 0.818}}

### Stats by day

In [11]:
jeter_id = next(client.get_players(search="derek jeter"))["retro_id"]
requested_stats = [
    "H",
    "SO",
    "AB",
    "2B",
    "3B",
    "BB",
    "HR",
    "OPS",
]

# Output not shown, as it's quite large
_ = client.get_batting_stat_split(
    retro_id=jeter_id, 
    stats=requested_stats,
    agg_by="D",
)

### Stats filtered by date range

In [25]:
requested_stats = [
    "AVG",
    "SLG",
    "IBB"
]

res = client.get_batting_stat_split(
    retro_id=jeter_id, 
    stats=requested_stats,
    agg_by="C",
    start_date="2014-05-01",
    end_date="2014-06-01",
)

pprint(res)
# Can also easily pass into pandas dataframe

pd.DataFrame(res)

{'AVG': {'career': 0.274}, 'IBB': {'career': 0}, 'SLG': {'career': 0.34}}


Unnamed: 0,AVG,SLG,IBB
career,0.274,0.34,0


### Date Range - Different "agg_by"

In [24]:
res = client.get_batting_stat_split(
    retro_id=jeter_id, 
    stats=["H", "HR", "2B", "BB", "OPS"],
    agg_by="DOW",
    start_date="2014-05-01",
    end_date="2014-06-01",
)
pd.DataFrame(res)

Unnamed: 0,H,HR,2B,BB,OPS
fri,3,0,0,1,0.378
mon,6,0,1,1,1.038
sat,4,0,1,0,0.693
sun,8,0,0,0,0.783
thu,1,0,0,1,0.258
tue,3,0,0,4,0.721
wed,4,1,0,0,0.688


In [17]:
res = client.get_batting_stat_split(
    retro_id=jeter_id, 
    stats=requested_stats,
    agg_by="D",
    start_date="2014-05-01",
    end_date="2014-06-01",
)
pd.DataFrame(res)

Unnamed: 0,AVG,SLG,IBB
2014-05-01,0.25,0.25,0
2014-05-02,0.0,0.0,0
2014-05-04,0.0,0.0,0
2014-05-05,0.5,0.75,0
2014-05-06,0.5,0.5,0
2014-05-07,0.4,1.0,0
2014-05-09,0.0,0.0,0
2014-05-11,0.2,0.2,0
2014-05-12,0.75,0.75,0
2014-05-13,0.0,0.0,0


### Stats vs. a Specific Pitcher, between specific dates

In [28]:
# Aaron Judge vs. J.A. Happ All time before July in 2018, broken down by day
res = client.get_batting_stat_split(
    retro_id="judga001", 
    stats=["H", "OPS", "BB", "SO"],
    agg_by="D",
    end_date="2018-06-30",
    vs_pitcher="happj001",
)
pd.DataFrame(res)

Unnamed: 0,H,OPS,BB,SO
2016-08-17,1,0.666,0,2
2017-07-04,1,1.666,0,2
2017-08-08,0,0.667,2,1
2018-03-29,1,1.667,1,1


### Tex vs. LHP

In [32]:
# Mark Teixiera Stats vs. LHP, by year
res = client.get_batting_stat_split(
    retro_id="teixm001", 
    stats=["H", "OPS", "BB", "SO"],
    agg_by="Y",
    pitcher_throw="L",
)
pd.DataFrame(res)

Unnamed: 0,H,OPS,BB,SO
2003,51,0.923,17,38
2004,51,0.959,18,38
2005,50,0.858,17,33
2006,51,0.977,20,42
2007,55,1.0,21,33
2008,63,0.91,41,29
2009,59,0.882,29,33
2010,52,0.918,41,34
2011,58,0.971,22,25
2012,51,0.866,20,28


In [43]:
# Mark Teixiera All time REG season career Stats vs. LHP, by park
res = client.get_batting_stat_split(
    retro_id="teixm001", 
    stats=["H", "OPS", "BB", "SO"],
    agg_by="PARK_NAME",
    pitcher_throw="L",
    game_type="REG",
)
pd.DataFrame(res)

Unnamed: 0,H,OPS,BB,SO
Angel Stadium of Anaheim,20,0.748,18,6
AT&T Park,0,0.0,0,2
Busch Stadium III,1,1.0,0,0
Chase Field,3,1.833,2,1
Citi Field,3,0.6,3,2
Citizens Bank Park,6,0.786,2,8
Comerica Park,17,0.961,8,13
Coors Field,1,0.708,2,1
Dodger Stadium,0,0.0,0,1
Fenway Park,17,0.972,16,12
