# Alaska history

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import datetime as dt
import numpy as np
import altair as alt
import altair_stiles as altstiles



In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [4]:
today = dt.date.today().strftime("%Y-%m-%d")

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

#### add a csv link here, if possible

In [6]:
history_df = pd.read_csv(
    "data/raw/Alaska Daily Stats - 2004 to Present (2).csv",
    parse_dates=["SitReportDate"],
    dtype={"ID": str},
)

In [7]:
history_df = history_df.drop_duplicates(subset="SitReportDate", keep="last")

In [8]:
history_df.columns = history_df.columns.str.lower()

In [9]:
history_df.rename(columns={"sitreportdate": "date"}, inplace=True)

In [10]:
history_slim = history_df[["date", "totalacres"]].copy()

In [11]:
dates = (
    history_slim.groupby(["date"])
    .agg({"totalacres": sum})
    .round(2)
    .reset_index()
    .sort_values("date", ascending=True)
).rename(columns={"totalacres": "acres"})

#### Add in missing dates

In [12]:
fake_dates = pd.date_range("1993-01-01", today, freq="D")

In [13]:
fake_dates_df = pd.DataFrame({"date": fake_dates})

#### Merge the acres to all the dates

In [14]:
merged = pd.merge(fake_dates_df, dates, on="date", how="left")

In [15]:
merged["year"] = pd.to_datetime(merged["date"]).dt.year

In [16]:
merged["month_day"] = pd.to_datetime(merged["date"]).dt.strftime("%m-%d")

#### Forward fill missing values - two different ways

In [17]:
merged["filled"] = merged.groupby("year")["acres"].ffill().fillna(0).astype(int)
# merged["filled"] = merged.groupby("year")["acres"].transform(lambda x: x.ffill())

---

#### Chart the summer months

In [18]:
chart_df = merged[(merged["month_day"] > "04-01") & (merged["month_day"] < "09-30")]

In [19]:
alt.Chart(chart_df).mark_line(interpolate="step-after").encode(
    x=alt.X("month_day:T", axis=alt.Axis(tickCount=6), title=""),
    y=alt.Y("filled", title=" ", axis=alt.Axis(tickCount=6)),
    color=alt.Color("year:O", scale=alt.Scale(scheme="category20")),
).properties(title="Cumulative acres burned, by year").configure_legend(
    symbolType="stroke"
)

In [20]:
dw_df_wide = chart_df.pivot_table(
    index="month_day", columns="year", values="filled"
).reset_index()

In [21]:
dw_df_wide.to_csv("data/processed/acres_burned_cum_alaska_year_day_dw.csv", index=False)