# Dodgers Data Bot
> This notebook is a sketchpad for data collected in this project. Nothing to see here! 

---

In [1]:
import os
import requests
import time
import pandas as pd
import jupyter_black
import altair as alt
from IPython.display import Image
from tqdm.notebook import tqdm

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
profile_name = os.environ.get("AWS_PERSONAL_PROFILE")

---

## Fetch

#### Read wins, losses

In [4]:
history_df = pd.read_json(
    "https://stilesdata.com/dodgers/data/standings/dodgers_standings_1958_present.json"
)
history_df["game_date"] = pd.to_datetime(history_df["game_date"], unit="ms")
history_df["diff"] = history_df["r"] - history_df["ra"]
history_df_slim = history_df[
    ["game_date", "home_away", "opp", "r", "ra", "diff"]
].copy()

In [18]:
import numpy as np

# Bin run differentials manually
history_df_slim["diff_bin"] = history_df_slim["diff"].apply(lambda x: int(np.floor(x)))

# Group and count
binned = (
    history_df_slim.groupby("diff_bin")
    .size()
    .reset_index(name="count")
    .assign(color=lambda df: np.where(df["diff_bin"] < 0, "#EF3E42", "#005A9C"))
)

# Altair chart
chart = (
    alt.Chart(binned)
    .mark_bar()
    .encode(
        x=alt.X("diff_bin:O", title="Losses ← Run differential → Wins"),
        y=alt.Y("count:Q", title="Game count"),
        color=alt.Color("color:N", scale=None, legend=None),
        tooltip=["diff_bin", "count"],
    )
    .properties(
        width=800,
        height=450,
        title="Distribution of Dodgers run differentials (1958–present)",
    )
)

chart

In [20]:
predicate = alt.datum.diff > 0
color = alt.when(predicate).then(alt.value("#005A9C")).otherwise(alt.value("#EF3E42"))

chart = alt.Chart(history_df_slim.sort_values("game_date")).mark_bar(size=0.5).encode(
    x="game_date:T",
    y="diff:Q",
    color=color,
    tooltip=["game_date", "opp", "r", "ra", "diff"],
).properties(title="Dodgers Run Differential by Game (1958 - Present)") + alt.Chart(
    history_df_slim[history_df_slim["diff"] <= -16]
).mark_circle(
    size=50, color="black"
).encode(
    x="game_date:T", y="diff:Q"
)
chart.properties(width=800, height=450)