# Dodgers hitting by season: 1884-present, via [stathead](https://stathead.com/baseball/team-batting-season-finder.cgi?request=1&match=team_season&order_by_asc=0&order_by=year_id&comp_type=reg&team_id=LAD)

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")

---

## Read data 

In [6]:
src = pd.read_csv(
    "data/raw/dodgers-hitting-season-stathead-1884-present.csv", dtype={"Season": str}
)
src.columns = src.columns.str.lower()

#### Clean up

In [7]:
src.drop(["rk", "lg"], axis=1, inplace=True)

#### Decades

In [8]:
src["decade"] = src["season"].str[:3] + "0s"

#### World Series

In [9]:
series_wins = ["2020", "1988", "1981", "1965", "1963", "1959", "1955"]

In [10]:
src["series_win"] = "No win"

In [11]:
src.loc[src["season"].isin(series_wins), "series_win"] = "World Series win"

In [12]:
src["hr/gm"] = (src["hr"] / src["gp"]).round(2)
src["h/gm"] = (src["h"] / src["gp"]).round(2)
src["bb/gm"] = (src["bb"] / src["gp"]).round(2)
src["sb/gm"] = (src["sb"] / src["gp"]).round(2)

In [13]:
src.head()

Unnamed: 0,season,team,gp,w,l,wl%,bat#,pa,ab,r,h,1b,2b,3b,hr,rbi,sb,cs,bb,so,ba,obp,slg,ops,ops+,tb,gidp,hbp,sh,sf,ibb,lob,r/gm,decade,series_win,hr/gm,h/gm,bb/gm,sb/gm
0,2022,Los Angeles Dodgers,131,90,41,0.687,29,5059,4473,695,1162,694,273,25,170,662.0,84.0,14.0,493,1096.0,0.26,0.336,0.446,0.782,114,1995,75.0,45,1.0,45.0,16.0,936.0,5.3,2020s,No win,1.3,8.87,3.76,0.64
1,2021,Los Angeles Dodgers,162,106,56,0.654,61,6239,5445,830,1330,822,247,24,237,799.0,65.0,17.0,613,1408.0,0.244,0.33,0.429,0.759,101,2336,96.0,104,32.0,45.0,36.0,1169.0,5.1,2020s,No win,1.46,8.21,3.78,0.4
2,2020,Los Angeles Dodgers,60,43,17,0.717,19,2316,2042,349,523,302,97,6,118,327.0,29.0,8.0,228,471.0,0.256,0.338,0.483,0.821,119,986,46.0,30,3.0,12.0,7.0,401.0,5.8,2020s,World Series win,1.97,8.72,3.8,0.48
3,2019,Los Angeles Dodgers,162,106,56,0.654,46,6282,5493,886,1414,813,302,20,279,861.0,57.0,10.0,607,1356.0,0.257,0.338,0.472,0.81,111,2593,100.0,81,55.0,45.0,47.0,1124.0,5.5,2010s,No win,1.72,8.73,3.75,0.35
4,2018,Los Angeles Dodgers,163,92,71,0.564,52,6358,5572,804,1394,830,296,33,235,756.0,75.0,24.0,647,1436.0,0.25,0.333,0.442,0.774,109,2461,119.0,61,39.0,39.0,47.0,1163.0,4.9,2010s,No win,1.44,8.55,3.97,0.46


#### Make a copy

In [14]:
df = src.copy()

---

#### Since the Brooklyn move

In [15]:
modern = df[df["team"] == "Los Angeles Dodgers"].copy()

In [16]:
modern.head()

Unnamed: 0,season,team,gp,w,l,wl%,bat#,pa,ab,r,h,1b,2b,3b,hr,rbi,sb,cs,bb,so,ba,obp,slg,ops,ops+,tb,gidp,hbp,sh,sf,ibb,lob,r/gm,decade,series_win,hr/gm,h/gm,bb/gm,sb/gm
0,2022,Los Angeles Dodgers,131,90,41,0.687,29,5059,4473,695,1162,694,273,25,170,662.0,84.0,14.0,493,1096.0,0.26,0.336,0.446,0.782,114,1995,75.0,45,1.0,45.0,16.0,936.0,5.3,2020s,No win,1.3,8.87,3.76,0.64
1,2021,Los Angeles Dodgers,162,106,56,0.654,61,6239,5445,830,1330,822,247,24,237,799.0,65.0,17.0,613,1408.0,0.244,0.33,0.429,0.759,101,2336,96.0,104,32.0,45.0,36.0,1169.0,5.1,2020s,No win,1.46,8.21,3.78,0.4
2,2020,Los Angeles Dodgers,60,43,17,0.717,19,2316,2042,349,523,302,97,6,118,327.0,29.0,8.0,228,471.0,0.256,0.338,0.483,0.821,119,986,46.0,30,3.0,12.0,7.0,401.0,5.8,2020s,World Series win,1.97,8.72,3.8,0.48
3,2019,Los Angeles Dodgers,162,106,56,0.654,46,6282,5493,886,1414,813,302,20,279,861.0,57.0,10.0,607,1356.0,0.257,0.338,0.472,0.81,111,2593,100.0,81,55.0,45.0,47.0,1124.0,5.5,2010s,No win,1.72,8.73,3.75,0.35
4,2018,Los Angeles Dodgers,163,92,71,0.564,52,6358,5572,804,1394,830,296,33,235,756.0,75.0,24.0,647,1436.0,0.25,0.333,0.442,0.774,109,2461,119.0,61,39.0,39.0,47.0,1163.0,4.9,2010s,No win,1.44,8.55,3.97,0.46


---

#### Mean batting stats, by decade

In [22]:
decades = (
    modern.groupby(["decade"])
    .agg({"r/gm": "mean", "hr/gm": "mean", "bb/gm": "mean", "sb/gm": "mean"})
    .round(2)
    .reset_index()
)

In [23]:
decades

Unnamed: 0,decade,r/gm,hr/gm,bb/gm,sb/gm
0,1950s,4.4,1.04,3.5,0.5
1,1960s,3.94,0.65,3.06,0.68
2,1970s,4.33,0.78,3.38,0.74
3,1980s,3.95,0.76,3.05,0.79
4,1990s,4.32,0.88,3.22,0.83
5,2000s,4.51,1.0,3.31,0.64
6,2010s,4.42,1.07,3.39,0.52
7,2020s,5.4,1.58,3.78,0.51


---

#### World Series wins

In [19]:
alt.Chart(modern).mark_bar(size=10, color="").encode(
    x=alt.X("season:T", axis=alt.Axis(), title=" "),
    y=alt.Y("r/gm", title=" ", axis=alt.Axis(tickCount=6)),
    color=alt.Color(
        "series_win",
        title=" ",
        # legend=None,
        scale=alt.Scale(
            domain=["World Series win", "No win"], range=["#005A9C", "#1E90FF"]
        ),
    ),
).properties(title="LA Dodgers: Runs per game since move to L.A.").configure_legend(
    orient="top"
)

#### Home runs

In [20]:
alt.Chart(modern).mark_bar(size=10, color="").encode(
    x=alt.X("season:T", axis=alt.Axis(), title=" "),
    y=alt.Y("hr/gm", title=" ", axis=alt.Axis(tickCount=6)),
    color=alt.Color(
        "series_win",
        title=" ",
        # legend=None,
        scale=alt.Scale(
            domain=["World Series win", "No win"], range=["#005A9C", "#1E90FF"]
        ),
    ),
).properties(
    title="LA Dodgers: Home runs per game since move to L.A."
).configure_legend(
    orient="top"
)

---

## Export

In [21]:
df.to_csv("data/processed/dodgers_games_seasons.csv", index=False)
modern.to_csv("data/processed/dodgers_games_seasons_since_la_move.csv", index=False)