# [IEMCow](https://mesonet.agron.iastate.edu/request/gis/watchwarn.phtml): Past warnings, by zone

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np
from datetime import date
import glob
from pathlib import Path

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 10000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")
today_month_day = pd.to_datetime(today).strftime("%m-%d")

---

## Get csvs from warnings archive

In [6]:
folder = Path("data/big/iemcow_raw/")
csvs = folder.glob("20*/*.csv")

In [7]:
src_df = pd.concat([pd.read_csv(csv, low_memory=False) for csv in csvs])

In [9]:
src_df.columns = src_df.columns.str.lower()

In [10]:
src_df["year"] = src_df["issued"].str[:4]

In [11]:
len(src_df)

6162065

---

#### Just the warning data related to heat

In [12]:
sig = {
    "W": "Warning",
    "A": "Watch",
}

In [13]:
heat_df = src_df[(src_df["phenom"] == "EH")]
df = heat_df[(heat_df["sig"] == "W") | (heat_df["sig"] == "A")].copy()

#### Need to figure out the best subset here

In [14]:
heat_warn_watch_df = df[
    [
        "issued",
        "expired",
        "init_iss",
        "init_exp",
        "phenom",
        "gtype",
        "sig",
        "etn",
        "status",
        "nws_ugc",
        "year",
    ]
].copy()

In [15]:
heat_warn_watch_df["datetime_issued"] = pd.to_datetime(
    heat_warn_watch_df["init_iss"], format="%Y-%m-%d %H:%M"
)

In [16]:
heat_warn_watch_df["datetime_expired"] = pd.to_datetime(
    heat_warn_watch_df["expired"], format="%Y-%m-%d %H:%M"
)

In [17]:
heat_warn_watch_df["days_delta"] = (
    heat_warn_watch_df["datetime_expired"] - heat_warn_watch_df["datetime_issued"]
)

In [18]:
heat_warn_watch_df["days"] = (
    heat_warn_watch_df["days_delta"].dt.total_seconds() / (24 * 60 * 60)
).round(2)

In [19]:
heat_warn_watch_df["month_day_issued"] = heat_warn_watch_df[
    "datetime_issued"
].dt.strftime("%m-%d")
heat_warn_watch_df["month_day_expired"] = heat_warn_watch_df[
    "datetime_expired"
].dt.strftime("%m-%d")

In [20]:
ext_heat_warn = heat_warn_watch_df[
    heat_warn_watch_df["month_day_issued"] <= today_month_day
].copy()

In [21]:
ext_heat_warn.sort_values("datetime_issued", ascending=False).head()

Unnamed: 0,issued,expired,init_iss,init_exp,phenom,gtype,sig,etn,status,nws_ugc,year,datetime_issued,datetime_expired,days_delta,days,month_day_issued,month_day_expired
76179,2022-09-06 19:00,2022-09-08 06:00,2022-09-05 14:52,2022-09-08 06:00,EH,C,W,2,CON,CAZ080,2022,2022-09-05 14:52:00,2022-09-08 06:00:00,2 days 15:08:00,2.63,09-05,09-08
86179,2022-09-06 18:00,2022-09-08 06:00,2022-09-04 21:14,2022-09-08 06:00,EH,C,W,2,CON,CAZ107,2022,2022-09-04 21:14:00,2022-09-08 06:00:00,3 days 08:46:00,3.37,09-04,09-08
193542,2022-09-06 18:00,2022-09-08 06:00,2022-09-04 21:14,2022-09-08 06:00,EH,C,W,2,CON,CAZ108,2022,2022-09-04 21:14:00,2022-09-08 06:00:00,3 days 08:46:00,3.37,09-04,09-08
46309,2022-09-06 18:00,2022-09-07 06:00,2022-09-04 21:14,2022-09-07 06:00,EH,C,W,2,CON,CAZ110,2022,2022-09-04 21:14:00,2022-09-07 06:00:00,2 days 08:46:00,2.37,09-04,09-07
78610,2022-09-06 19:00,2022-09-05 14:52,2022-09-04 20:27,2022-09-08 06:00,EH,C,A,2,UPG,CAZ080,2022,2022-09-04 20:27:00,2022-09-05 14:52:00,0 days 18:25:00,0.77,09-04,09-05


In [22]:
len(ext_heat_warn)

38720

---

## Aggregate

#### Which zones had the most ... at this point in the year?

In [23]:
heat_grouped_place = (
    ext_heat_warn.groupby(["nws_ugc"])
    .agg({"phenom": "count", "days": sum})
    .sort_values("days", ascending=False)
    .reset_index()
    .rename(columns={"phenom": "count", "days": "days"})
)

In [24]:
heat_grouped_place["state_zone"] = (
    heat_grouped_place["nws_ugc"].str[:2] + heat_grouped_place["nws_ugc"].str[3:]
)

In [25]:
heat_grouped_place.head()

Unnamed: 0,nws_ugc,count,days,state_zone
0,AZZ002,123,308.91,AZ002
1,CAZ527,123,308.91,CA527
2,AZZ036,119,305.94,AZ036
3,NVZ021,119,305.94,NV021
4,NVZ017,117,305.83,NV017


#### Which zones had the most - for each year ... through today

In [26]:
heat_grouped_year_place = (
    ext_heat_warn.groupby(["year", "nws_ugc"])
    .agg({"phenom": "count", "days": "sum"})
    .sort_values("days", ascending=False)
    .reset_index()
    .rename(columns={"phenom": "count", "days": "days"})
)

In [27]:
heat_grouped_year_place["state_zone"] = (
    heat_grouped_year_place["nws_ugc"].str[:2]
    + heat_grouped_year_place["nws_ugc"].str[3:]
)

In [28]:
heat_grouped_year_place.head()

Unnamed: 0,year,nws_ugc,count,days,state_zone
0,2020,AZZ540,24,74.12,AZ540
1,2020,AZZ550,24,74.12,AZ550
2,2020,AZZ537,24,74.12,AZ537
3,2020,AZZ543,24,74.12,AZ543
4,2020,AZZ544,24,74.12,AZ544


#### Heat warnings by year ... through today

In [29]:
heat_grouped_year = (
    ext_heat_warn.groupby(["year"])
    .agg({"phenom": "count", "days": "sum"})
    .sort_values("year", ascending=False)
    .reset_index()
    .rename(columns={"phenom": "count", "days": "days"})
)

In [30]:
heat_grouped_year

Unnamed: 0,year,count,days
0,2022,4246,7953.49
1,2021,3160,7657.2
2,2020,2725,6675.39
3,2019,4233,7588.79
4,2018,2644,5057.78
5,2017,1718,4679.83
6,2016,2386,5138.97
7,2015,1268,1809.56
8,2014,324,467.26
9,2013,577,1395.21


---

## Merge

#### Just the geography

In [31]:
zones_gdf = gpd.read_file("data/big/public_forecast_zones_pop.geojson")

In [32]:
zones_gdf.columns = zones_gdf.columns.str.lower()

In [33]:
zones_gdf.rename(columns={"name_1": "name", "name_2": "name_full"}, inplace=True)

#### Merge on `state_zone`

In [34]:
heat_gdf = gpd.GeoDataFrame(
    pd.merge(heat_grouped_year_place, zones_gdf, on="state_zone")
)

In [35]:
heat_gdf["count"] = heat_gdf["count"].fillna(0)

In [36]:
states = [
    "AL",
    "AR",
    "KS",
    "KY",
    "TN",
    "TX",
    "CO",
    "ID",
    "MA",
    "MT",
    "AZ",
    "WA",
    "MN",
    "WY",
    "NY",
    "CT",
    "NJ",
    "VA",
    "MD",
    "DE",
    "GA",
    "IA",
    "IL",
    "IN",
    "WV",
    "OR",
    "SD",
    "UT",
    "MI",
    "MO",
    "MS",
    "NC",
    "ND",
    "NE",
    "VT",
    "NH",
    "NV",
    "OH",
    "OK",
    "PA",
    "SC",
    "WI",
    "LA",
    "FL",
    "ME",
    "CA",
    "NM",
    "RI",
    "DC",
]

In [37]:
heat_gdf = heat_gdf[heat_gdf["state"].isin(states)]
zones_gdf = zones_gdf[zones_gdf["state"].isin(states)]

#### Sum the population by place

In [38]:
df = (
    heat_gdf.groupby(["year", "state_zone", "count", "days", "name", "state"])[
        "pop_share"
    ]
    .sum()
    .reset_index()
)

---

## Export

In [39]:
heat_gdf[
    ["year", "state_zone", "count", "pop_share", "days", "name", "state", "geometry"]
].to_file("data/raw/excessive_heat_warnings_2022.geojson", driver="GeoJSON")

In [40]:
zones_gdf[["state", "name", "state_zone", "geometry"]].to_file(
    "data/raw/public_forecast_zones.geojson", driver="GeoJSON"
)

In [41]:
heat_gdf.sort_values("count", ascending=False).head()

Unnamed: 0,year,nws_ugc,count,days,state_zone,state,cwa,time_zone,fe_area,zone,name,lon,lat,shortname,zone_area_sqm,name_full,state_name,state_fips,cnty_fips,fips,population,shape_area,county_area,area,area_share,pop_share,geometry
0,2020,AZZ540,24,74.12,AZ540,AZ,PSR,m,sc,540,Buckeye/Avondale,-112.4004,33.3912,Buckeye/Avondale,54.57,Maricopa County,Arizona,4,13,4013,4477918,2.314605,9226.0,587.39,6.3667,285096.0,"POLYGON ((-1504063.695 -310230.595, -1496691.3..."
12,2020,AZZ550,24,74.12,AZ550,AZ,PSR,m,sc,550,South Mountain/Ahwatukee,-112.0449,33.2829,South Mountain/Ahwatukee,9.93,Maricopa County,Arizona,4,13,4013,4477918,2.314605,9226.0,106.8,1.1576,51836.0,"POLYGON ((-1482686.257 -354885.304, -1482873.3..."
1,2020,AZZ540,24,74.12,AZ540,AZ,PSR,m,sc,540,Buckeye/Avondale,-112.4004,33.3912,Buckeye/Avondale,54.57,Pinal County,Arizona,4,21,4021,454915,1.341683,5374.25,0.05,0.0009,4.0,"MULTIPOLYGON (((-1488444.735 -348321.856, -148..."
67,2020,AZZ551,24,74.12,AZ551,AZ,PSR,m,sc,551,Southeast Valley/Queen Creek,-111.7353,33.2616,Southeast Valley/Queen Creek,13.61,Pinal County,Arizona,4,21,4021,454915,1.341683,5374.25,0.07,0.0013,6.0,"MULTIPOLYGON (((-1435955.497 -356361.513, -143..."
66,2020,AZZ551,24,74.12,AZ551,AZ,PSR,m,sc,551,Southeast Valley/Queen Creek,-111.7353,33.2616,Southeast Valley/Queen Creek,13.61,Maricopa County,Arizona,4,13,4013,4477918,2.314605,9226.0,146.42,1.587,71065.0,"POLYGON ((-1436189.682 -357725.623, -1436675.2..."


In [42]:
!mapshaper -i data/raw/excessive_heat_warnings_2022.geojson  -simplify percentage=.1 no-repair  -o data/big/excessive_heat_warnings_2022_simple.geojson



In [43]:
!mapshaper -i data/raw/public_forecast_zones.geojson  -simplify percentage=.1 no-repair  -o data/big/public_forecast_zones.geojson

[o] Wrote data/big/public_forecast_zones1.geojson
[o] Wrote data/big/public_forecast_zones2.geojson
[o] Wrote data/big/public_forecast_zones3.geojson


In [44]:
df.to_csv("data/processed/excessive_heat_warnings.csv", index=False)