# MLB rain/weather

#### Load python tools

In [1]:
%load_ext lab_black

In [6]:
import requests
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import zipfile
import glob
import os

In [7]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [8]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Get data from [baseball-reference](https://stathead.com/baseball/game_finder.cgi?request=1&match=basic&order_by_asc=0&order_by=HR&year_min=1901&year_max=2022&class=team&type=b&temperature_min=100&wind_speed_max=90)

In [9]:
path = r"data/raw/baseball/rain/"  # use your path
all_files = glob.glob(os.path.join(path, "*.csv"))

In [10]:
li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

src = pd.concat(li, axis=0, ignore_index=True)

In [11]:
# src = pd.read_csv("data/raw/baseball/temp/sportsref_download_100.csv")

In [12]:
src.columns = src.columns.str.lower()

In [13]:
src = src.sort_values("date", ascending=False)

In [14]:
src[["date", "game_no"]] = src["date"].astype(str).str.split("(", expand=True)

In [15]:
src["date"] = pd.to_datetime(src["date"])
src["year"] = src["date"].dt.year.astype(str).str.replace(".0", "", regex=False)

---

#### Years

In [20]:
years = (
    src.groupby(["year"])
    .agg({"prec": "count"})
    .reset_index()
    .rename(columns={"prec": "count"})
)

In [21]:
years

Unnamed: 0,year,count
0,1950,2
1,1952,2
2,1953,4
3,1955,4
4,1956,2
5,1958,18
6,1961,10
7,1962,8
8,1963,2
9,1964,4


In [22]:
years["five_year_mean"] = years["count"].rolling(5).mean()

In [23]:
base = alt.Chart(years).encode(x="year:O")

bar = base.mark_bar().encode(y="count:Q")

line = base.mark_line(color="red").encode(y="five_year_mean:Q")

(bar + line).properties(width=600)