# Dodgers Data Bot
> This notebook is a sketchpad for data collected in this project. Nothing to see here! 

---

In [35]:
import os
import requests
import time
import pandas as pd
import jupyter_black
import altair as alt
from IPython.display import Image
from tqdm.notebook import tqdm
from bs4 import BeautifulSoup
from io import StringIO
import time

In [36]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [37]:
profile_name = os.environ.get("AWS_PERSONAL_PROFILE")

---

In [47]:
headers = {
    "sec-ch-ua-platform": '"macOS"',
    "Referer": "https://www.mlb.com/",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
    "sec-ch-ua": '"Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"',
    "sec-ch-ua-mobile": "?0",
}

url = "https://bdfed.stitch.mlbinfra.com/bdfed/stats/team?&env=prod&sportId=1&gameType=R&group=hitting&order=desc&sortStat=homeRuns&stats=season&season=2025&limit=30&offset=0"

response = requests.get(url, headers=headers)

stats_json = response.json()["stats"]
stats_df = pd.DataFrame(stats_json)

In [55]:
# Define metadata columns to skip ranking.
metadata_cols = [
    "year",
    "type",
    "rank",
    "shortName",
    "teamId",
    "teamAbbrev",
    "teamName",
    "teamShortName",
    "leagueAbbrev",
    "leagueName",
    "leagueShortName",
]

# Identify the statistic columns.
stats_cols = [col for col in stats_df.columns if col not in metadata_cols]

# Define for each stat whether a higher value is good ('desc') or a lower value is good ('asc').
rank_order = {
    "gamesPlayed": "desc",  # more games is usually better (more opportunities)
    "groundOuts": "asc",  # fewer outs are better
    "airOuts": "asc",  # fewer outs are better
    "runs": "desc",
    "doubles": "desc",
    "triples": "desc",
    "homeRuns": "desc",
    "strikeOuts": "asc",  # fewer strikeouts is good
    "baseOnBalls": "desc",
    "intentionalWalks": "desc",
    "hits": "desc",
    "hitByPitch": "desc",  # getting on base is positive
    "avg": "desc",
    "atBats": "desc",  # more opportunities (ambiguous, but using desc)
    "obp": "desc",
    "slg": "desc",
    "ops": "desc",
    "caughtStealing": "asc",  # fewer caught stealing is better
    "stolenBases": "desc",
    "stolenBasePercentage": "desc",
    "groundIntoDoublePlay": "asc",  # fewer double plays is better
    "numberOfPitches": "desc",  # more pitches usually means more plate appearances
    "plateAppearances": "desc",
    "totalBases": "desc",
    "rbi": "desc",
    "leftOnBase": "asc",  # fewer left on base is preferred
    "sacBunts": "asc",  # fewer sacrifices is often better
    "sacFlies": "asc",  # fewer sac flies is better
    "babip": "desc",
    "groundOutsToAirouts": "asc",  # lower ratio is assumed better here
    "catchersInterference": "asc",  # fewer interferences is good
    "atBatsPerHomeRun": "asc",  # lower is clearly better
}

# Calculate rankings for each stat column based on its desired order.
# Using dictionary comprehension to build a DataFrame of ranks.
stats_rank = pd.DataFrame(
    {
        stat: stats_df[stat].rank(method="min", ascending=(rank_order[stat] == "asc"))
        for stat in stats_cols
    }
)

keep_cols = [
    "runs",
    "avg",
    "obp",
    "slg",
    "doubles",
    # "triples",
    "homeRuns",
    "atBatsPerHomeRun",
    "baseOnBalls",
    "stolenBases",
    "stolenBasePercentage",
]

# Filter for the Dodgers row (using teamAbbrev 'LAD').
dodgers_filter = stats_df["teamAbbrev"] == "LAD"
dodgers_stats = stats_df.loc[dodgers_filter, stats_cols].iloc[0]
dodgers_ranks = stats_rank.loc[dodgers_filter].iloc[0].astype(int)

# Create a new DataFrame summarizing each stat, the Dodgers' value, and their league rank.
dodgers_results = (
    pd.DataFrame(
        {
            "stat": stats_cols,
            "value": dodgers_stats.values,
            "league_rank": dodgers_ranks.values,
        }
    )
    .query(f"stat.isin({keep_cols})")
    .reset_index(drop=True)
)

In [56]:
dodgers_results

Unnamed: 0,stat,value,league_rank
0,runs,79.0,9
1,doubles,20.0,21
2,homeRuns,31.0,2
3,baseOnBalls,62.0,7
4,avg,0.23,17
5,obp,0.31,13
6,slg,0.431,8
7,stolenBases,9.0,22
8,stolenBasePercentage,0.75,19
9,atBatsPerHomeRun,18.81,3
