# MLB hitting by season: 1884-present, via [stathead](https://stathead.com/baseball/team-batting-season-finder.cgi?request=1&match=team_season&order_by_asc=0&order_by=year_id&comp_type=reg&team_id=LAD)

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")

---

## Read data 

#### Loop through years, get stats

In [6]:
dataframes = []

for year in range(1970, 2023):
    dataframes.append(
        pd.read_html(f"https://www.baseball-reference.com/leagues/majors/{year}.shtml")[
            0
        ].assign(season=year)
    )

In [7]:
src = (
    pd.concat(dataframes)
    .sort_values(["season", "R/G"], ascending=False)
    .reset_index(drop=True)
)

In [8]:
src = src[~src["Tm"].isin(["League Average", "Tm", np.NaN])]

In [9]:
src.columns = src.columns.str.lower()

In [10]:
src[
    [
        "#bat",
        "batage",
        "r/g",
        "g",
        "pa",
        "ab",
        "r",
        "h",
        "2b",
        "3b",
        "hr",
        "rbi",
        "sb",
        "cs",
        "bb",
        "so",
        "ba",
        "obp",
        "slg",
        "ops",
        "ops+",
        "tb",
        "gdp",
        "hbp",
        "sh",
        "sf",
        "ibb",
        "lob",
    ]
] = src[
    [
        "#bat",
        "batage",
        "r/g",
        "g",
        "pa",
        "ab",
        "r",
        "h",
        "2b",
        "3b",
        "hr",
        "rbi",
        "sb",
        "cs",
        "bb",
        "so",
        "ba",
        "obp",
        "slg",
        "ops",
        "ops+",
        "tb",
        "gdp",
        "hbp",
        "sh",
        "sf",
        "ibb",
        "lob",
    ]
].astype(
    float
)

In [11]:
src.loc[src["tm"].str.contains("Angels"), "tm"] = "Los Angeles Angles"
src.loc[src["tm"].str.contains("Marlins"), "tm"] = "Miami Marlins"
src.loc[src["tm"].str.contains("Rays"), "tm"] = "Tampa Bay Rays"
src.loc[src["tm"].str.contains("Indians"), "tm"] = "Cleveland Guardians"
src.loc[src["tm"].str.contains("Senators"), "tm"] = "Texas Rangers"

#### Clean up

In [12]:
src["hr/gm"] = (src["hr"] / src["g"]).round(2)
src["h/gm"] = (src["h"] / src["g"]).round(2)
src["bb/gm"] = (src["bb"] / src["g"]).round(2)
src["sb/gm"] = (src["sb"] / src["g"]).round(2)

In [13]:
src.season = src.season.astype(str)

#### Home runs by team

In [19]:
alt.Chart(src).mark_area(color="red").encode(
    x=alt.X(
        "season:O",
        axis=alt.Axis(tickCount=6, values=["1970", "1995", "2020"]),
        title=" ",
    ),
    y=alt.Y("hr/gm", title=" ", axis=alt.Axis(tickCount=2)),
    facet=alt.Facet("tm", columns=8),
).properties(title="Home runs per game", width=150, height=100).configure_legend(
    orient="top"
)

In [15]:
teams = src[src["tm"] != "Los Angeles Dodgers"]["tm"].unique()

In [29]:
alt.Chart(src).mark_bar().encode(
    x=alt.X(
        "season:O",
        axis=alt.Axis(
            tickCount=6, values=["1970", "1980", "1990", "2000", "2010", "2020"]
        ),
        title=" ",
    ),
    y=alt.Y("tm", title=" "),
    # size=alt.Size("sum(sb/gm):Q", legend=None),
    color=alt.Color(
        "hr/gm:Q",
        scale=alt.Scale(scheme="yelloworangered"),
        title="Home runs per game",
    ),
).properties(width=600, height=600).configure_legend(orient="top")

#### Most hr/game?

In [26]:
src[src["hr/gm"] == src["hr/gm"].max()]

Unnamed: 0,tm,#bat,batage,r/g,g,pa,ab,r,h,2b,3b,hr,rbi,sb,cs,bb,so,ba,obp,slg,ops,ops+,tb,gdp,hbp,sh,sf,ibb,lob,season,hr/gm,h/gm,bb/gm,sb/gm
67,Los Angeles Dodgers,38.0,28.0,5.82,60.0,2316.0,2042.0,349.0,523.0,97.0,6.0,118.0,327.0,29.0,8.0,228.0,471.0,0.256,0.338,0.483,0.821,119.0,986.0,46.0,30.0,3.0,12.0,7.0,401.0,2020,1.97,8.72,3.8,0.48


#### Most walks per game?

In [27]:
src[src["bb/gm"] == src["bb/gm"].max()]

Unnamed: 0,tm,#bat,batage,r/g,g,pa,ab,r,h,2b,3b,hr,rbi,sb,cs,bb,so,ba,obp,slg,ops,ops+,tb,gdp,hbp,sh,sf,ibb,lob,season,hr/gm,h/gm,bb/gm,sb/gm
733,Seattle Mariners,37.0,31.3,5.6,162.0,6444.0,5497.0,907.0,1481.0,300.0,26.0,198.0,869.0,122.0,56.0,775.0,1073.0,0.269,0.361,0.442,0.803,108.0,2427.0,129.0,48.0,63.0,61.0,34.0,1247.0,2000,1.22,9.14,4.78,0.75


#### Most stolen bases per game?

In [28]:
src[src["sb/gm"] == src["sb/gm"].max()]

Unnamed: 0,tm,#bat,batage,r/g,g,pa,ab,r,h,2b,3b,hr,rbi,sb,cs,bb,so,ba,obp,slg,ops,ops+,tb,gdp,hbp,sh,sf,ibb,lob,season,hr/gm,h/gm,bb/gm,sb/gm
1452,Oakland Athletics,38.0,29.5,4.26,161.0,6107.0,5353.0,686.0,1319.0,208.0,33.0,113.0,625.0,341.0,123.0,592.0,818.0,0.246,0.323,0.361,0.684,105.0,1932.0,91.0,45.0,58.0,58.0,45.0,1067.0,1976,0.7,8.19,3.68,2.12


#### Most hits per game?

In [25]:
src[src["h/gm"] == src["h/gm"].max()]

Unnamed: 0,tm,#bat,batage,r/g,g,pa,ab,r,h,2b,3b,hr,rbi,sb,cs,bb,so,ba,obp,slg,ops,ops+,tb,gdp,hbp,sh,sf,ibb,lob,season,hr/gm,h/gm,bb/gm,sb/gm
830,Boston Red Sox,46.0,27.9,5.25,162.0,6430.0,5781.0,851.0,1684.0,373.0,32.0,185.0,810.0,68.0,48.0,514.0,1044.0,0.291,0.352,0.463,0.815,110.0,2676.0,155.0,59.0,21.0,55.0,54.0,1221.0,1997,1.14,10.4,3.17,0.42
