# N.L. West 2022 race analysis

By Ben Welsh

## Import Python tools

In [1]:
import pandas as pd
import altair as alt

In [2]:
import requests
from bs4 import BeautifulSoup

## Scrape

Pull the win log for each team from baseballreference.com

In [26]:
def parse_win_log(team):
    url = f"https://www.baseball-reference.com/teams/{team}/2022-schedule-scores.shtml"
    r = requests.get(url)
    html = r.text
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.find(class_="stats_table")
    row_list = table.find_all("tr")
    cumulative = 0
    result_list = []
    for row in row_list:
        cell_list = row.find_all("td")
        if not len(cell_list):
            continue
        date_string = row.find('td', attrs={"data-stat": "date_game"})['csk']
        if not date_string:
            continue
        result = row.find('td', attrs={"data-stat": "win_loss_result"})
        if result:
            result = result.text.strip().replace("-wo", "")
            if result == "W":
                cumulative += 1
            result = dict(
                team=team,
                date=date_string,
                result=result,
                cumulative=cumulative
            )
            result_list.append(result)
    return result_list

In [27]:
team_list = [
    "LAD",
    "SD",
    "SF",
    "ARI",
    "COL",
]

In [28]:
result_list = []
for team in team_list:
    result_list += parse_win_log(team)

## Merge

In [29]:
df = pd.DataFrame(result_list)

## Sketch

In [39]:
alt.Chart(df).mark_line(interpolate='step-after').encode(
    x=alt.X("date:T", title=None),
    y=alt.Y("cumulative:Q", title="Wins"),
    color=alt.Color("team:N")
).properties(width=500, height=700, title="The NL West 2022 race")

## Export

Pivot things to the format datawrapper likes

In [36]:
pivot = df.pivot_table(index="date", columns="team", values="cumulative").ffill().fillna(0)

In [37]:
pivot.to_csv("./cumulative-wins.csv")