# LA Dodgers toplines
> This notebook extracts key statistics from the project's processed tables for display in a dashboard.

---

#### Import Python tools and Jupyter config

In [8]:
import pandas as pd
import jupyter_black
import altair as alt

In [9]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

## Read

#### Standings

In [320]:
standings = pd.read_parquet(
    "https://stilesdata.com/dodgers/data/standings/dodgers_standings_1958_present.parquet"
).query("year == '2024'")
standings_past = pd.read_parquet(
    "https://stilesdata.com/dodgers/data/standings/dodgers_standings_1958_present.parquet"
).query("year != '2024'")
standings_now = standings.query("game_date == game_date.max()")

In [80]:
batting = pd.read_parquet(
    "https://stilesdata.com/dodgers/data/batting/dodgers_team_batting_1958_present.parquet"
)

In [111]:
batting_past = batting.query("season != '2024'").copy()
batting_now = batting.query("season == '2024'").copy()

---

## Key statistics

#### 1. Current season record (Wins-Losses)
> Provides an immediate understanding of the team's overall performance for the season.

In [235]:
games = standings_latest_game["gm"].loc[0]
wins = standings_latest_game["wins"].loc[0]
losses = standings_latest_game["losses"].loc[0]

In [236]:
record = standings_latest_game["record"].loc[0]

#### 2. Win percentage
> Allows for normalization of success to compare across different seasons or different numbers of games played.

In [346]:
win_pct = int(standings_latest_game["win_pct"].loc[0] * 100)
win_pct_decade_thispoint = int(
    standings_past.query(f"gm == {games}").head(10)["win_pct"].mean().round(2) * 100
)

#### 3. Run differential
> A positive run differential generally correlates with a stronger team performance and is predictive of future success.

In [238]:
runs = standings["r"].sum()
runs_against = standings["ra"].sum()

In [None]:
run_diff = runs - runs_against

#### 4. Home runs and home runs per game
> Reflects the team's power-hitting capabilities, significant for scoring strategies.

In [240]:
batting_past["hr_game"] = (
    batting_past["hr"].astype(int) / batting_past["g"].astype(int)
).round(2)

In [241]:
home_runs = int(batting_now["hr"].sum())
home_runs_game = (home_runs / games).round(2)
home_runs_game_last = batting_past.query('season == "2023"')["hr_game"].iloc[0]

In [242]:
games_decade = batting_past.head(10)["g"].astype(int).sum()
home_runs_decade = batting_past.head(10)["hr"].astype(int).sum()

In [243]:
home_runs_game_decade = (home_runs_decade / games_decade).round(2)

#### 5. Earned run average (ERA)
> A key measure of pitching staff effectiveness, with a lower ERA indicating better performance.

#### Batting average and on
> Summarizes players' strength in getting on base — and hopefully scoring runs.

In [314]:
batting_average = batting_now["ba"].iloc[0]

In [310]:
batting_average_decade = (
    batting_past.head(10)["ba"]
    .astype(float)
    .mean()
    .round(3)
    .astype(str)
    .replace("0.", ".")
)

#### 7. Stolen bases
> Stolen bases can significantly impact game dynamics and indicate the team's strategic play.

In [244]:
stolen_bases = int(batting_now["sb"].iloc[0])
stolen_bases_game = (stolen_bases / games).round(2)

In [245]:
stolen_decade = batting_past.head(10)["sb"].astype(int).sum()
games_decade = batting_past.head(10)["g"].astype(int).sum()
stolen_bases_decade_game = (stolen_decade / games_decade).round(2)

#### 8. Fielding percentage
> Indicates the team's defensive capabilities, with a higher percentage reflecting better performance.

#### 9. Recent trend (last 10 games)
> Provides insight into the team's current form and momentum, which is essential for assessing changes in performance.

In [246]:
last_10 = standings["result"].head(10)
win_count_trend = last_10[last_10 == "W"].count()
loss_count_trend = last_10[last_10 == "L"].count()

In [247]:
win_loss_trend = f"Recent trend: {win_count_trend} wins, {loss_count_trend} losses"

#### 10. Summary
> Creates one file to import for topline statistics and a narrative summary of the standings now.

In [276]:
summary = f"The Dodgers have played {games} games this season compiling a {record} record — a winning percentage of {win_pct}%. The team's last game was a {standings_now['r'].iloc[0]}-{standings_now['ra'].iloc[0]} {standings_now['home_away'].iloc[0]} loss to the {standings_now['opp'].iloc[0]} in front of {'{:,}'.format(standings_now['attendance'].iloc[0])} fans. The team has won {win_count_trend} of its last 10 games."

In [None]:
summary_data = {
    "stat": [
        "wins",
        "losses",
        "record",
        "win_pct",
        "win_pct_decade_thispoint",
        "runs",
        "runs_against",
        "run_differential",
        "home_runs",
        "home_runs_game",
        "home_runs_game_last",
        "home_runs_game_decade",
        "stolen_bases",
        "stolen_bases_game",
        "stolen_bases_decade_game",
        "batting_average",
        "batting_average_decade",
        "summary",
    ],
    "stat_value": [
        wins,
        losses,
        record,
        f"{win_pct}%",
        f"{win_pct_decade_thispoint}%",
        runs,
        runs_against,
        run_diff,
        home_runs,
        home_runs_game,
        home_runs_game_last,
        home_runs_game_decade,
        stolen_bases,
        stolen_bases_game,
        stolen_bases_decade_game,
        batting_average,
        batting_average_decade,
        summary,
    ],
    "category": [
        "standings",
        "standings",
        "standings",
        "standings",
        "standings",
        "standings",
        "standings",
        "standings",
        "batting",
        "batting",
        "batting",
        "batting",
        "batting",
        "batting",
        "batting",
        "batting",
        "batting",
        "standings",
    ],
}

In [None]:
summary_df = pd.DataFrame(summary_data)

Unnamed: 0,stat,stat_value,category
0,wins,12,standings
1,losses,11,standings
2,record,12-11,standings
3,win_pct,52%,standings
4,win_pct_decade_thispoint,59%,standings
5,runs,114,standings
6,runs_against,109,standings
7,run_differential,5,standings
8,home_runs,26,batting
9,home_runs_game,1.13,batting
