In [1]:
import functools
import json
from urllib.request import urlopen

import matplotlib.pyplot as plt
import plotly.express as px
import polars as pl
import seaborn as sns

from nwec.constants import CLEAN_UTILITY_DATA

In [2]:
utilities = ["avista", "cng", "nwng", "pac", "pse"]
empty_df = pl.read_ipc(CLEAN_UTILITY_DATA / f"{utilities[0]}.arrow").filter(pl.lit(False))
combined_arrearages = functools.reduce(
    lambda x, y: pl.concat([x, pl.read_ipc(CLEAN_UTILITY_DATA / f"{y}.arrow")]), utilities, empty_df
)
combined_arrearages.write_ipc(CLEAN_UTILITY_DATA / "combined_arrearages.arrow")

In [3]:
df = pl.read_ipc(CLEAN_UTILITY_DATA / "combined_arrearages.arrow")

In [None]:
data = df.filter(pl.col("Customer Class") == "Residential")

# Create the bar chart
plt.figure(figsize=(10, 6))
sns.barplot(x="Month", y="Amount", hue="Utility", data=data, errorbar=None)

# Customize the plot
plt.title("Arrearage Amounts by Month")
plt.xlabel("Month")
plt.ylabel("Arrearage Amount")
plt.xticks(rotation=45)  # Rotate month labels

# Show the plot
plt.show()

In [None]:
df.group_by("Zip Code", "Vintage", "Month").agg(pl.sum("Amount"))

In [None]:
data = df.filter(pl.col("Customer Class") == "Residential")
data = data.group_by("Zip Code", "Vintage", "Month").agg(pl.sum("Amount"))

# Create the bar chart
plt.figure(figsize=(10, 6))
sns.barplot(x="Month", y="Amount", hue="Vintage", data=data, errorbar=None)

# Customize the plot
plt.title("Arrearage Amounts by Month")
plt.xlabel("Month")
plt.ylabel("Arrearage Amount")
plt.xticks(rotation=45)  # Rotate month labels for better readability

# Show the plot
plt.show()

In [5]:
with urlopen(
    "https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/refs/heads/master/wa_washington_zip_codes_geo.min.json"
) as response:
    zip_codes = json.load(response)

In [None]:
data = (
    df.filter(pl.col("Customer Class") == "Residential")
    .filter(pl.col("Utility") == "PSE")
    .filter(pl.col("Vintage") == 90)
)
data_pd = data.to_pandas()

min_lon, min_lat, max_lon, max_lat = 1000, 1000, -1000, -1000

for c in zip_codes["features"]:
    for i in c["geometry"]["coordinates"][0]:
        if isinstance(i[0], float) and isinstance(
            i[1], float
        ):  # and abs(i[0] - max_lon) < 15 and abs(i[1]-max_lat) < 15:
            min_lon = min(min_lon, i[0])
            max_lon = max(max_lon, i[0])
            min_lat = min(min_lat, i[1])
            max_lat = max(max_lat, i[1])
center_lon = (min_lon + max_lon) / 2.0
center_lat = (min_lat + max_lat) / 2.0

fig = px.choropleth(
    data_pd,
    geojson=zip_codes,
    locations="Zip Code",
    featureidkey="properties.ZCTA5CE10",
    color="Amount",
    hover_name="Zip Code",
    color_continuous_scale="Viridis",
    scope="usa",
)

fig.update_geos(
    visible=True,
    center_lon=center_lon,
    center_lat=center_lat,
    lataxis_range=[min_lat, max_lat],
    lonaxis_range=[min_lon, max_lon],
)

fig.show()