# Dodgers Data Bot
> This notebook is a sketchpad for data collected in this project. Nothing to see here! 

---

In [125]:
import os
import requests
import time
import pandas as pd
import jupyter_black
import altair as alt
from IPython.display import Image
from tqdm.notebook import tqdm
from bs4 import BeautifulSoup
from io import StringIO

In [126]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [127]:
profile_name = os.environ.get("AWS_PERSONAL_PROFILE")

---

In [None]:
# Expected stats from Baseball Savant
def fetch_expected_stats(year=2025):
    url = (
        "https://baseballsavant.mlb.com/leaderboard/expected_statistics?"
        f"type=batter&year={year}&position=&team=&filterType=bip&min=q&csv=true"
    )
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/132.0.0.0 Safari/537.36"
        )
    }

    response = requests.get(url, headers=headers)
    response.raise_for_status()  # raise exception for HTTP errors

    return pd.read_csv(StringIO(response.text))


# Example usage
df = fetch_expected_stats()

# Filter for players with a minimum number of plate appearances if needed (e.g., 50+)
qualified = df[df["pa"] >= 50]

# Calculate the league average xwOBA
lg_avg_xwoba = qualified["est_woba"].mean()

In [None]:
# Expected stats from Baseball Savant
xwoba_url = "https://stilesdata.com/dodgers/data/batting/dodgers_xwoba_current.json"

xwoba_df = pd.read_json(xwoba_url)

xwoba_df["rn_fwd"] = (
    xwoba_df.groupby("player_name")["rn"]
    .rank(method="first", ascending=False)
    .astype(int)
)

0.34494701986754966

In [124]:
# Add a new column with the league average to every row
xwoba_df["lg_avg"] = lg_avg_xwoba

# Base chart using the same data source for both layers
base = alt.Chart(xwoba_df)

# Player xwOBA trend line
player_line = (
    base.mark_line(color="#1f77b4")
    .encode(
        x=alt.X("rn_fwd", title="", scale=alt.Scale(zero=False)),
        y=alt.Y("xwoba", scale=alt.Scale(domain=[0, 0.5]), title="xwOBA"),
    )
    .properties(width=100, height=100)
)

# League average rule—using the new 'lg_avg' field.
avg_line = base.mark_rule(color="gray", strokeDash=[4, 2]).encode(y=alt.Y("lg_avg:Q"))

# Layer both charts and force a shared y-scale
layered = alt.layer(player_line, avg_line).resolve_scale(y="shared")

# Facet by player_name
chart = (
    layered.facet(facet=alt.Facet("player_name:N", title=None), columns=7)
    .properties(title="xwOBA - Last 50 plate appearances")
    .configure_view(stroke=None)
    .configure_axis(labelFontSize=10, titleFontSize=12)
)

chart

0.34494701986754966


In [121]:
print("Min xwOBA:", xwoba_df["xwoba"].min())
print("Max xwOBA:", xwoba_df["xwoba"].max())

Min xwOBA: 0.17776524
Max xwOBA: 0.5203146200000001

Min xwOBA: 0.17776524
Max xwOBA: 0.5203146200000001


In [57]:
response = requests.get(
    "https://baseballsavant.mlb.com/leaderboard/expected_statistics"
)
soup = BeautifulSoup(response.text, "html.parser")
soup.find("div", id="expected_stats")

<div class="table-savant" id="expected_stats" style="font-size: 12px"></div>

In [71]:
lg_avg_xwoba

0.34494701986754966