In [2]:
# initialize
from datetime import datetime
import pandas as pd
from pybaseball import (
    statcast,
    cache,
    playerid_lookup,
    statcast_pitcher,
    statcast_batter,
    statcast_batter_expected_stats,
    statcast_pitcher_expected_stats,
    batting_stats,
    pitching_stats,
    team_pitching,
    team_batting,
    team_fielding,
    standings,
    playerid_reverse_lookup,
    playerid_reverse_lookup,
)

cache.enable()
start_dt = "2024-03-28"
end_dt = datetime.now().date().strftime("%Y-%m-%d")

In [56]:
"""
  Batting
Runs Scored (R)1
Total Bases (TB)1
Runs Batted In (RBI)1
Walks (BB)1
Strikeouts (K)-1
Stolen Bases (SB)1
Caught Stealing (CS)-1
Grand Slam Home Runs (GSHR)5
Pitching
Innings Pitched (IP)3
Hits Allowed (H)-1
Earned Runs (ER)-2
Walks Issued (BB)-1
Strikeouts (K)1
Shutouts (SO)1
Wins (W)2
Losses (L)-2
Saves (SV)5
Blown Saves (BS)-1
Holds (HD)2
Pitcher Team Win (PTW)1
  """

SCORING_RULES = {
    "batting": {
        "R": 1,
        "TB": 1,
        "RBI": 1,
        "BB": 1,
        "K": -1,
        "SB": 1,
        "CS": -1,
        "GSHR": 5,
    },
    "pitching": {
        "IP": 3,
        "H": -1,
        "ER": -2,
        "BB": -1,
        "K": 1,
        "SO": 1,
        "W": 2,
        "L": -2,
        "SV": 5,
        "BS": -1,
        "HD": 2,
        "PTW": 1,
    },
}
batting_scoring_df = pd.DataFrame(
    SCORING_RULES["batting"].items(), columns=["Stat", "Points"]
)
pitching_scoring_df = pd.DataFrame(
    SCORING_RULES["pitching"].items(), columns=["Stat", "Points"]
)
print(batting_scoring_df)
print(pitching_scoring_df)

   Stat  Points
0     R       1
1    TB       1
2   RBI       1
3    BB       1
4     K      -1
5    SB       1
6    CS      -1
7  GSHR       5
   Stat  Points
0    IP       3
1     H      -1
2    ER      -2
3    BB      -1
4     K       1
5    SO       1
6     W       2
7     L      -2
8    SV       5
9    BS      -1
10   HD       2
11  PTW       1


In [95]:
batters = [
    "Logan O'Hoppe",
    "Justin Turner",
    "Ketel Marte",
    "Max Muncy",
    "Brice Turang",
    "Juan Soto",
    "Steven Kwan",
    "Michael Conforto",
    "Aaron Judge",
    "Tyler O'Neill",
    # "Luis Robert Jr" # gotta figure out the suffix
]
pitchers = [
    "Yoshinobu Yamamoto",
    "Josh Hader",
    "Sonny Gray",
    "Kenley Jansen",
    "Cristian Javier",
    "Hunter Greene",
    "Carlos Rodon",
    "Aaron Nola",
    "Ronel Blanco",
    "Nick Pivetta",
]


In [97]:
batter_ids = pd.concat(
    [
        playerid_lookup(split_name[1], split_name[0])
        for split_name in [name.split(" ") for name in batters]
    ]
)
pitcher_ids = pd.concat(
    [
        playerid_lookup(split_name[1], split_name[0])
        for split_name in [name.split(" ") for name in pitchers]
    ]
)
roster_ids = pd.concat([batter_ids, pitcher_ids])

In [96]:
def getRosterStats(
    player_ids: pd.DataFrame, statcast_func, start_dt=start_dt, end_dt=end_dt
):
    key_mlbams = player_ids[["key_mlbam"]].values.tolist()
    player_stats = [
        statcast_func(start_dt, end_dt, player_id=key_mlbam[0]).assign(player_id=key_mlbam[0])

        for key_mlbam in key_mlbams
        if key_mlbam[0] is not None
    ]
    df = pd.concat(player_stats)
    df["game_date"] = pd.to_datetime(df["game_date"])
    return df

In [98]:
batter_stats = getRosterStats(roster_ids, statcast_batter)
pitcher_stats = getRosterStats(roster_ids, statcast_pitcher)

Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data


  df = pd.concat(player_stats)


Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data


  df = pd.concat(player_stats)


In [77]:
average_velocity = (
    pitcher_stats[["player_name", "release_speed", "pitch_name"]]
    .groupby(["player_name", "pitch_name"])
    .agg(["mean"])
    .sort_values(by=("release_speed", "mean"), ascending=False)
)

average_velocity.style.set_caption("Average Pitch Velocity by Pitcher").format(
    precision=3
)

Unnamed: 0_level_0,Unnamed: 1_level_0,release_speed
Unnamed: 0_level_1,Unnamed: 1_level_1,mean
player_name,pitch_name,Unnamed: 2_level_2
"Greene, Hunter",4-Seam Fastball,98.521
"Hader, Josh",Sinker,95.638
"Yamamoto, Yoshinobu",4-Seam Fastball,95.343
"Jansen, Kenley",Sinker,94.567
"Pivetta, Nick",4-Seam Fastball,94.093
"Blanco, Ronel",4-Seam Fastball,93.504
"Gray, Sonny",Sinker,93.094
"Gray, Sonny",4-Seam Fastball,92.703
"Javier, Cristian",4-Seam Fastball,92.088
"Jansen, Kenley",Cutter,91.816


In [78]:
average_velocity.to_html("exports/average_velocity.html")

In [73]:
batter_stats.to_csv("exports/batter_stats.csv", index=False)
pitcher_stats.to_csv("exports/pitcher_stats.csv", index=False)

In [99]:
fg_batter_ids = batter_ids["key_fangraphs"].values.tolist()
fg_pitcher_ids = pitcher_ids["key_fangraphs"].values.tolist()
print(fg_batter_ids)
print(fg_pitcher_ids)
batting_stats_fg = (
    batting_stats(2024).set_index("IDfg").filter(items=fg_batter_ids, axis=0)
)
pitching_stats_fg = (
    pitching_stats(2024).set_index("IDfg").filter(items=fg_pitcher_ids, axis=0)
)

[24729, 5235, 13613, 13301, 22186, 20123, 24610, 16376, 15640, 15711]
[-1, 14212, 12768, 3096, 17606, 22182, 16149, 19407, 15454]


In [100]:
batter_dashboard = batting_stats_fg[
    [
        "Name",
        "PA",
        "HR",
        "R",
        "RBI",
        "SB",
        "BB%",
        "K%",
        "ISO",
        "BABIP",
        "AVG",
        "OBP",
        "SLG",
        "OPS",
        "wOBA",
        "wRC+",
        "BsR",
        "WAR",
        "Off",
        "Def",
    ]
]
batter_dashboard.head(10).sort_values(by="OPS", ascending=False).to_html(
    "exports/batter_dashboard.html"
)

In [101]:
gshr=batter_stats.set_index('player_id').filter(items=['events', 'des'], axis=1).where(lambda x: x['events'] == 'home_run' ).where(lambda x: x['des'].str.contains('grand slam')).dropna().groupby('player_id').count()
gshr['GSHR']=gshr['events']
gshr=gshr.drop(columns=['events', 'des'])
newdf=batter_ids.join(batting_stats_fg, on="key_fangraphs").join(gshr, on='key_mlbam')

In [125]:
fantasy_df = newdf.set_index("Name")
fantasy_df["TB"] = round(fantasy_df["SLG"] * fantasy_df["AB"]).astype(int)
fantasy_df["K"] = fantasy_df["SO"]
fantasy_df["GSHR"] = fantasy_df["GSHR"].fillna(0).astype(int)
fantasy_dashboard = fantasy_df[["R" , "TB" , "RBI" , "BB" , "K" , "SB" , "CS", 'GSHR']].multiply(SCORING_RULES['batting'])
fantasy_dashboard["Total"] = fantasy_dashboard.sum(axis=1)
fantasy_dashboard.sort_values(by="Total", ascending=False)

Unnamed: 0_level_0,R,TB,RBI,BB,K,SB,CS,GSHR,Total
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Juan Soto,11,33,15,16,-10,1,-1,0,65
Tyler O'Neill,14,36,8,11,-13,1,0,0,57
Ketel Marte,16,32,8,8,-11,1,0,0,54
Michael Conforto,10,35,14,5,-14,0,-1,5,54
Steven Kwan,16,34,5,2,-11,1,0,0,47
Justin Turner,7,27,8,9,-9,0,-1,0,41
Brice Turang,8,24,7,3,-9,8,0,0,41
Aaron Judge,7,26,9,16,-17,0,0,0,41
Max Muncy,12,33,13,9,-27,0,0,0,40
Logan O'Hoppe,5,23,6,5,-9,0,0,5,35


In [121]:
pitching_stats_fg.to_csv("exports/pitching_stats_fg.csv", index=False)
batting_stats_fg.to_csv("exports/batting_stats_fg.csv", index=False)
fantasy_dashboard.style.set_caption("Fantasy Dashboard").bar(subset=["Total"], color="lightblue").highlight_max(color='yellow').highlight_min(color='pink').to_html("exports/fantasy_dashboard.html")


In [68]:
start_dt = "2024-03-28"
end_dt = datetime.now().date().strftime("%Y-%m-%d")
statcast(
    start_dt,
    end_dt,
).to_csv(f"exports/statcast_{start_dt}-{end_dt}.csv", index=False)

This is a large query, it may take a moment to complete


100%|██████████| 20/20 [00:00<00:00, 28.04it/s]
  final_data = pd.concat(dataframe_list, axis=0).convert_dtypes(convert_string=False)
