In [29]:
import os

import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import pandas as pd
from scipy.integrate import simpson
from tqdm.notebook import tqdm

from lib import merge_in_geometry

In [2]:
plt.style.use('dark_background')  # for cool points

In [22]:
years = list(range(2017, 2023))
root_dir = "data"
states = pd.read_csv(os.path.join(root_dir, "raw/states/state_codes.csv")).set_index("state_fips_code")
county_boundaries: gpd.GeoDataFrame = gpd.read_file(os.path.join(root_dir, "raw/counties/cb_2018_us_county_500k.shp"))
outage_integrals_path = os.path.join("data", "processed", "outage", "yearly_county_outage_integrals.csv")

In [15]:
data_by_year = {}
for year in years:
    print(year)
    processed_path = os.path.join(root_dir, f"processed/outage/{year}.parquet")
    data = pd.read_parquet(processed_path)
    data.OutageFraction = np.clip(data.OutageFraction, 0, 1)
    data_by_year[year] = data

2017
2018
2019
2020
2021
2022


In [16]:
counties = set()
for year, data in data_by_year.items():
    counties = counties | set(data.index.get_level_values("CountyFIPS"))
counties = sorted(counties)

In [26]:
if os.path.exists(outage_integrals_path):
    df = pd.read_csv(outage_integrals_path, index_col=False, dtype={"county": str})    
    
else:
    integrals = []
    for county_code in tqdm(counties):

        try:
            county = county_boundaries.sort_values("GEOID").set_index("GEOID").loc[county_code, :]
            state_code = county.STATEFP
            state_name = states.loc[int(state_code), "state_name"]
            county_name = county.NAME
        except Exception as e:
            #print(f"Couldn't find names for county: {county}")
            state_name = "-"
            county_name = "-"

        # whole timeseries April-October for single county
        for year in years:
            df = data_by_year[year]
            try:
                data = df.loc(axis=0)[:, county_code].reset_index(level="CountyFIPS")
                complete_index = pd.date_range(f"{year}-04-01", f"{year}-10-31", freq="1H")
                data = data.reindex(index=complete_index, fill_value=0)
                data.index.name = "RecordDateTime"
            except KeyError:
                #print(f"Couldn't find outage data for {county_code}")
                continue

            # integrate OutageFraction over time
            time_axis_in_hours = data.reset_index()["RecordDateTime"].astype(int) / (3600 * 1E9)
            time_axis_in_hours = time_axis_in_hours - time_axis_in_hours.iloc[0]
            time_span = time_axis_in_hours.iloc[-1] - time_axis_in_hours.iloc[0]
            outage_integral = simpson(data.OutageFraction, x=time_axis_in_hours)

            integrals.append((year, county_code, outage_integral, time_span))
            
    df = pd.DataFrame(integrals, columns=["year", "county", "outage_integral", "total_duration_h"])
    df.to_csv(outage_integrals_path, index=False)

In [116]:
plot_dir = "plots/outage_maps_by_year"
os.makedirs(plot_dir, exist_ok=True)
os.makedirs(os.path.join(plot_dir, "US"), exist_ok=True)
os.makedirs(os.path.join(plot_dir, "eastern_US"), exist_ok=True)

for year in years:

    # sum across years to get one outage figure for each county
    outages = df[df.year == year].copy()
    # divide by length of time we have county data for
    outages["outage_norm"] = outages.outage_integral / outages.total_duration_h
    # bring in geometry
    joined = county_boundaries.loc[:, ["GEOID", "geometry"]].set_index("GEOID").join(outages.set_index("county"))

    data = joined.outage_norm
    data = data[data > 0]

    f, ax = plt.subplots(figsize=(18, 10))
    
    cmap = "PuBu_r"
    
    norm = colors.LogNorm(vmin=1E-6, vmax=0.1)
    cbar = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    f.colorbar(cbar, cax=cax, label="Outage magnitude")
    
    joined.plot("outage_norm", ax=ax, cmap=cmap)
    
    ax.set_title(f"Electricity outages April - October {year}")
    ax.set_xlabel("Longitude")
    ax.set_ylabel("Latitude")
    
    ax.grid(alpha=0.2)
    ax.set_frame_on(False)
    ax.set_aspect("equal")

    ax.set_xlim(-128, -63)
    ax.set_ylim(22, 51)
    f.savefig(os.path.join(plot_dir, "US", f"{year}.png"))

    ax.set_xlim(-101, -66)
    ax.set_ylim(23, 49)
    f.savefig(os.path.join(plot_dir, "eastern_US", f"{year}.png"))
    
    plt.close(f)