# Dodgers regular-season game hitting: 2010-present, via [stathead](https://stathead.com/baseball/team-batting-game-finder.cgi?request=1&max_wind_speed=90&team_game_min=1&comp_type=reg&max_temperature=120&order_by=date&match=team_game&team_id=LAD&year_max=2022&team_game_max=165&order_by_asc=0&min_temperature=0&min_wind_speed=0&year_min=2010&offset=1800)

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")

---

## Read data 

In [6]:
src = pd.read_csv("data/raw/dodgers-games-stathead-2010-present.csv")
src.columns = src.columns.str.lower()

#### Split columns and clean up

In [7]:
src[["win_loss", "score", "extrainning"]] = src["result"].str.split(" ", expand=True)
src[["runs_scored", "runs_allowed"]] = src["score"].str.split("-", expand=True)
src["home_away"] = src["unnamed: 3"]

In [8]:
src.drop(["result", "score", "pa", "r", "unnamed: 3", "rk"], axis=1, inplace=True)

#### Dates

In [9]:
src[["date", "date_anno"]] = src["date"].str.split(" ", expand=True)

In [10]:
src["date"] = pd.to_datetime(src["date"])

In [11]:
src["year"] = src["date"].dt.year
src["month"] = src["date"].dt.month
src["weekday"] = src["date"].dt.day_name()

#### Home vs. away

In [12]:
src.loc[src.home_away == "@", "home_away"] = "away"
src.loc[src.home_away != "away", "home_away"] = "home"

#### Data types

In [13]:
src["runs_allowed"] = src["runs_allowed"].astype(int)
src["runs_scored"] = src["runs_scored"].astype(int)

In [14]:
df = src[
    [
        "date",
        "year",
        "month",
        "weekday",
        "win_loss",
        "home_away",
        "opp",
        "runs_scored",
        "runs_allowed",
        "ab",
        "h",
        "1b",
        "2b",
        "3b",
        "hr",
        "rbi",
        "sb",
        "cs",
        "bb",
        "so",
        "ba",
        "obp",
        "slg",
        "ops",
        "tb",
        "gidp",
        "hbp",
        "sh",
        "sf",
        "ibb",
    ]
].copy()

In [15]:
years = [*range(2012, 2022, 1)]
decade = df[df["year"].isin(years)]

In [16]:
decade[decade["runs_scored"] == decade["runs_scored"].max()]

Unnamed: 0,date,year,month,weekday,win_loss,home_away,opp,runs_scored,runs_allowed,ab,h,1b,2b,3b,hr,rbi,sb,cs,bb,so,ba,obp,slg,ops,tb,gidp,hbp,sh,sf,ibb
200,2021-07-10,2021,7,Saturday,W,home,ARI,22,1,44,21,10,2,1,8,22,0,0,8,4,0.477,0.566,1.114,1.68,49,2,1,0,0,0


In [39]:
decade[["runs_scored", "obp"]].corr()

Unnamed: 0,runs_scored,obp
runs_scored,1.0,0.733863
obp,0.733863,1.0


---

## Export

In [18]:
df.to_csv("data/processed/dodgers_games.csv", index=False)

In [19]:
decade.to_csv("data/processed/dodgers_games_2012-2021.csv", index=False)