# LA Dodgers standings - Sketches
> This notebook analyzes the team's current standings table from [Baseball Reference](https://www.baseball-reference.com/teams/LAD/2024-schedule-scores.shtml) and compares it with historic records. 

---

#### Import Python tools and Jupyter config

In [132]:
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair_stiles as altstiles
from IPython.display import Image, display
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [133]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

---

## Fetch

#### Read game-by-game standings from 1958-present

In [134]:
src = pd.read_parquet(
    "https://stilesdata.com/dodgers/data/standings/dodgers_standings_1958_present.parquet"
)

In [135]:
games_so_far = int(src.query("year == '2024'")["gm"].max())

#### Calculate final wins in each season

In [136]:
final_wins = src.groupby("year")["wins"].max()

#### Merge the final wins back to the main dataframe

In [137]:
df = src.merge(final_wins.rename("final_wins"), on="year")

In [138]:
df.head()

Unnamed: 0,gm,game_date,home_away,opp,result,r,ra,record,rank,gb,time,time_minutes,wins,losses,win_pct,day_night,attendance,game_day,year,final_wins
0,21,2024-04-17,home,WSN,L,0,2,12-9,1,1.0,2:16:00,136,12,9,0.57,D,44428,Wednesday,2024,12
1,20,2024-04-16,home,WSN,W,6,2,12-8,1,1.0,2:26:00,146,12,8,0.6,N,52718,Tuesday,2024,12
2,19,2024-04-15,home,WSN,L,4,6,11-8,1,1.0,2:36:00,156,11,8,0.58,N,42677,Monday,2024,12
3,18,2024-04-14,home,SDP,L,3,6,11-7,1,2.0,2:58:00,178,11,7,0.61,D,49432,Sunday,2024,12
4,17,2024-04-13,home,SDP,W,5,2,11-6,1,3.0,2:36:00,156,11,6,0.65,N,44582,Saturday,2024,12


#### Winning percentage at this point in the season historically?

In [139]:
wins_today_df = df.query(f"gm=={games_so_far}")[
    ["gm", "year", "wins", "losses", "win_pct", "final_wins"]
]

In [140]:
wins_today_df.head()

Unnamed: 0,gm,year,wins,losses,win_pct,final_wins
0,21,2024,12,9,0.57,12
162,21,2023,10,11,0.48,100
324,21,2022,14,7,0.67,111
486,21,2021,15,6,0.71,106
546,21,2020,14,7,0.67,43


In [141]:
wins_today_df.tail()

Unnamed: 0,gm,year,wins,losses,win_pct,final_wins
9822,21,1962,13,8,0.62,102
9976,21,1961,11,10,0.52,89
10130,21,1960,10,11,0.48,82
10287,21,1959,12,9,0.57,88
10440,21,1958,9,12,0.43,71
