# Dodgers Jumbo Jack
> This notebook processes game logs from Baseball Reference and calculates the share of games in which Dodgers pitchers had at least seven strikeouts. This triggers a promotion at Jack in the Box.

---

In [None]:
import os
import requests
import time
import pandas as pd
import jupyter_black
from tqdm.notebook import tqdm
from datetime import datetime
import altair as alt
import altair_stiles as altstiles

In [None]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

---

In [None]:
base_url = "https://www.baseball-reference.com/teams/tgl.cgi?team=LAD&t=p&year={year}"
years = range(2000, datetime.now().year + 1)

In [None]:
frames = []
for yr in tqdm(years):
    url = base_url.format(year=yr)
    try:
        src = pd.read_html(url)[0]
        src.columns = src.columns.droplevel(0)  # drop first level
        src.columns = src.columns.str.lower()
        df = src.rename(columns={"unnamed: 3_level_1": "home_away"}).copy()
        df["year"] = yr  # tag year
        df_slim = df[["date", "home_away", "so", "year"]]
        frames.append(df_slim)
    except Exception as e:
        print(f"Failed on {yr}: {e}")
    time.sleep(5)

In [None]:
all_years = pd.concat(frames, ignore_index=True).query('so != "SO" and ~date.isnull()')
all_years["so"] = all_years["so"].astype(int)

In [None]:
all_years["jumbo_jack"] = all_years["so"] >= 7

In [None]:
alt.Chart(all_years).mark_bar(binSpacing=0).encode(
    x=alt.X(
        "so:Q",
        axis=alt.Axis(format=".0f", values=[1, 7, 15, 21]),
        bin=alt.Bin(maxbins=20, nice=False),
        title="Strikeouts",
    ),
    y=alt.Y("count()", title="Games"),
).properties(width=200, height=100).facet("year:O", columns=6)

In [None]:
# Mean equals share of true
season_share = (
    all_years.groupby("year", as_index=False)["jumbo_jack"]
    .mean()
    .assign(pct_jumbo_jack=lambda d: (d["jumbo_jack"] * 100).round(1))
    .drop(columns="jumbo_jack")
)
season_share

In [None]:
# A more traditional route
season_summary = (
    all_years.groupby("year")
    .agg(
        n_games=("jumbo_jack", "size"),  # includes NaNs
        n_true=("jumbo_jack", "sum"),  # booleans sum to count of True
    )
    .assign(pct_jumbo_jack=lambda d: (d.n_true / d.n_games * 100).round(1))
    .reset_index()
)

In [None]:
# Notes: The 2020 season was shortened by COVID.
# The 2011 season only had 161 games because of a late-season rainout that wasn't rescheduled.
season_summary