# LA Dodgers Standings, 1958-2024
> This notebook visusalizes the team's historic standings with data from [Baseball Reference](https://www.baseball-reference.com/teams/LAD/2024-schedule-scores.shtml).

---

In [72]:
import os
import pandas as pd
import jupyter_black
import altair as alt
import altair_stiles as altstiles

In [73]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [75]:
profile_name = os.environ.get("AWS_PERSONAL_PROFILE")

'haekeo'

---

## Fetch

#### Read historical archive, compiled in notebooks `00` and `01`

In [3]:
df = pd.read_parquet("data/processed/dodgers_standings_1958_present.parquet")

In [4]:
game_number = df.query("game_date == game_date.max()")["gm"].iloc[0]

In [5]:
df.head(game_number)

Unnamed: 0,gm,game_date,home_away,opp,result,r,ra,record,rank,gb,time,day_night,attendance,year
0,10,2024-04-05,away,CHC,L,7.0,9.0,7-3,1,2.0,2:57,D,34981.0,2024
1,9,2024-04-03,home,SFG,W,5.0,4.0,7-2,1,2.0,2:25,N,52746.0,2024
2,8,2024-04-02,home,SFG,W,5.0,4.0,6-2,1,1.0,2:57,N,49365.0,2024
3,7,2024-04-01,home,SFG,W,8.0,3.0,5-2,1,1.0,2:38,N,49044.0,2024
4,6,2024-03-31,home,STL,W,5.0,4.0,4-2,1,0.0,2:41,D,41014.0,2024
5,5,2024-03-30,home,STL,L,5.0,6.0,3-2,1,0.0,3:17,N,45019.0,2024
6,4,2024-03-29,home,STL,W,6.0,3.0,3-1,1,0.0,2:23,N,47524.0,2024
7,3,2024-03-28,home,STL,W,7.0,1.0,2-1,1,0.0,2:23,D,52667.0,2024
8,2,2024-03-21,home,SDP,L,11.0,15.0,1-1,1,0.0,3:42,N,15928.0,2024
9,1,2024-03-20,away,SDP,W,5.0,2.0,1-0,1,0.5,3:05,N,15952.0,2024


---

## Comparing seasons to this point

#### Limit dataframe to latest game number this season

In [6]:
limit_df = df.query(f"gm <= {game_number}").copy()

In [30]:
past = (
    alt.Chart(df.query("year != '2024'"))
    .mark_line(size=0.8)
    .encode(
        x=alt.X(
            "gm",
            title="Game number in season",
            axis=alt.Axis(values=[20, 40, 60, 80, 100, 120, 140, 160]),
            scale=alt.Scale(domain=[0, 162]),
        ),
        y=alt.Y("gb:Q", title="Games back by game in the season: 1958-2024"),
        color=alt.Color("year", scale={"range": ["#e2e2e2"]}, legend=None),
    )
    .properties(
        width=650,
        height=400,
        title="LA Dodgers historical standings",
    )
)

current = (
    alt.Chart(df.query("year == '2024'"))
    .mark_line(size=2, color="#005A9C")
    .encode(
        x=alt.X("gm", scale=alt.Scale(domain=[0, 164])),  # Apply the same domain limit
        y="gb:Q",
    )
)

hline = (
    alt.Chart(pd.DataFrame({"y": [0]}))
    .mark_rule(color="black", strokeWidth=0.5)
    .encode(y="y")
)

# Define a text annotation just above the horizontal line
text = (
    alt.Chart(pd.DataFrame({"y": [0], "text": ["Leading ↑"]}))
    .mark_text(
        color="#666666",
        align="center",
        baseline="bottom",
        dy=-0,  # Adjust this value to position the text above the line
        dx=300,  # Adjust this value to position the text horizontally
        fontSize=10,
        fontWeight="bold",
    )
    .encode(y="y:Q", text="text:N")
)

# Define a text annotation just above the horizontal line
anno_text = (
    alt.Chart(pd.DataFrame({"y": [20], "text": ["Past seasons"]}))
    .mark_text(
        color="#666666",
        align="center",
        baseline="bottom",
        dy=55,  # Adjust this value to position the text above the line
        dx=100,  # Adjust this value to position the text horizontally
        fontSize=10,
        fontWeight="bold",
    )
    .encode(y="y:Q", text="text:N")
)

# Combine everything
chart = past + hline + current + text + anno_text

# Show the chart
chart

In [8]:
alt.Chart(limit_df.query(f"gm == {game_number}")).mark_bar().encode(
    x=alt.Y(
        "year:O",
        axis=alt.Axis(
            values=["1960", "1970", "1980", "1990", "2000", "2010", "2024"],
            title="",
        ),
    ),
    y=alt.Y("gb:Q", title=""),
    color=alt.condition(
        alt.datum.gb > 0,
        alt.value("#005A9C"),
        alt.value("#e9e9e9"),
    ),
).properties(
    width=650,
    height=200,
    title=f"LA Dodgers historical standings: Games back by game {game_number} of the season: 1958-2024",
)

---

## Scoring

#### Group by season and sum runs, runs against

In [67]:
runs_season_limit = (
    df.groupby("year").agg({"r": "sum", "ra": "sum", "gm": "size"}).reset_index()
).rename(columns={"r": "runs", "ra": "runs_against", "gm": "games"})

#### Runs and runs against per game

In [68]:
runs_season_limit["runs_per_game"] = (
    runs_season_limit["runs"] / runs_season_limit["games"]
).round(2)

In [69]:
runs_season_limit["runs_against_per_game"] = (
    runs_season_limit["runs_against"] / runs_season_limit["games"]
).round(2)

#### Difference

In [None]:
runs_season_limit["runs_per_game_diff"] = (
    runs_season_limit["runs_per_game"] - runs_season_limit["runs_against_per_game"]
)

---

#### Runs scrored to this point

In [11]:
limit_df["r"] = limit_df["r"].astype(int)

In [12]:
runs_so_far = (
    limit_df.groupby("year")["r"]
    .sum()
    .reset_index(name="runs_to_date")
    .sort_values("year", ascending=False)
)

In [13]:
runs_this_season = int(runs_so_far.query("year == year.max()")["runs_to_date"].iloc[0])

In [14]:
base = (
    alt.Chart(runs_so_far)
    .encode(
        x=alt.X(
            "runs_to_date",
            title=f"Runs by game no. {game_number}",
            axis=alt.Axis(tickCount=6),
        ),
        y=alt.Y("year:O", title="").sort("-x"),
        color=alt.condition(
            alt.datum.year == "2024",
            alt.value("steelblue"),
            alt.value("#e3e3e3"),
        ),
        text=alt.Text("runs_to_date", title=""),
    )
    .properties(
        height=1100,
        width=650,
        title=f"Dodgers historical offense: Total runs through game {game_number}, 1958-2024",
    )
)

base.mark_bar(color="#005A9C") + base.mark_text(align="left", dx=2, color="#000")

# Define the vertical line for "runs_this_season"
vertical_line = (
    alt.Chart(pd.DataFrame({"x": [runs_this_season]}))
    .mark_rule(color="black", size=0.5)
    .encode(
        x="x:Q",
    )
)

# Define the text annotation for the vertical line
text_annotation = (
    alt.Chart(pd.DataFrame({"x": [runs_this_season], "y": [runs_so_far["year"].max()]}))
    .mark_text(
        text=[f"Runs this season: {runs_this_season}"],
        align="left",
        dx=5,  # Adjust text position horizontally
        dy=-1005,  # Adjust text position vertically
    )
    .encode(
        x="x:Q",
        y=alt.Y("y:O", axis=alt.Axis(title="")),
    )
)

# Combine your base chart with the vertical line and text annotation
final_chart = (
    base.mark_bar(color="#005A9C")
    + base.mark_text(align="left", dx=2, color="#000")
    + vertical_line
    + text_annotation
)

# Set properties for the combined chart if necessary
final_chart.properties(
    height=1100,
    width=650,
    title=f"Dodgers historical offense: Total runs through game {game_number}, 1958-2024",
)

---

## Summary

In [None]:
Games so far
Record
Road vs. away
Runs scored
Position


---

## Exports

In [None]:
# !aws s3 cp ../data/processed/la_county_tree_locations.json s3://stiles-data/trees/la_county_tree_locations.json --profile {profile_name}