In [1]:
import datetime

import polars as pl

import nwec.utility_reporting.arrearages
import nwec.utils.excel
from nwec.constants import DATA

YEAR = 2024
NUM_MONTHS = 3
COLS_PER_MONTH = 4
spreadsheet = DATA / f"utility_reporting/pse/pse_{YEAR}.xlsx"

In [2]:
sheet_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, "Past Due Balances")
df = pl.read_excel(spreadsheet, sheet_id=sheet_index, has_header=False)

# Arrearages

In [3]:
_, arrearage_start_index = nwec.utils.excel.find_unpromoted_header(
    df, "Past-due balances by customer class and number of days"
)

# Set up arrearages-specific DF with built-in space for the zip code and customer class columns
arrearages = df.select(df.columns[arrearage_start_index : arrearage_start_index + NUM_MONTHS * COLS_PER_MONTH + 2])
arrearages = nwec.utility_reporting.arrearages.normalize_zip_class_cols(df, arrearages)

In [4]:
zip_index = nwec.utils.excel.find_unpromoted_header(arrearages, "Zip Code")
new_columns = arrearages.select(arrearages.columns[2:]).slice(zip_index[0], 1).to_dicts()[0]
vintage_cols = arrearages.select(arrearages.columns[2:]).slice(zip_index[0] + 1, 1).to_dicts()[0]
months = list({k: v for k, v in new_columns.items() if v is not None}.values())
for counter, col in enumerate(vintage_cols):
    current_month = months[counter // COLS_PER_MONTH]
    date = datetime.datetime.strptime(current_month, "%Y-%m-%d %H:%M:%S").astimezone(datetime.UTC)
    new_columns[col] = date.strftime("%B %Y")
    new_columns[col] = new_columns[col] + " " + vintage_cols[col]
new_columns = new_columns | {"Zip Code": "Zip Code", "Customer Class": "Customer Class"}

In [5]:
arrearages = arrearages.rename(new_columns).select(list(new_columns.values()))
arrearages = arrearages.filter(~pl.all_horizontal(pl.all().is_null()))
arrearages = arrearages.filter(pl.col("Customer Class").str.contains(r"(?i)residential")).drop(pl.col("Customer Class"))
arrearages = arrearages.with_columns([pl.col(col).cast(pl.Float64, strict=False) for col in arrearages.columns[1:]])

In [6]:
arrearages.sum()

April 2024 31 - 60 Days,April 2024 61 - 90 Days,April 2024 91+ Days,April 2024 Total Arrearages,May 2024 31 - 60 Days,May 2024 61 - 90 Days,May 2024 91+ Days,May 2024 Total Arrearages,June 2024 31 - 60 Days,June 2024 61 - 90 Days,June 2024 91+ Days,June 2024 Total Arrearages,Zip Code
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
,18843000.0,66271000.0,142120000.0,21600000.0,15369000.0,71822000.0,136570000.0,17212000.0,16436000.0,75010000.0,134350000.0,22404931.0


# KLI Arrearages

In [7]:
_, kli_arrearage_start_index = nwec.utils.excel.find_unpromoted_header(df, "past-due balances for known low-income")

# Set up arrearages-specific DF with built-in space for the zip code and customer class columns
kli_arrearages = df.select(
    df.columns[kli_arrearage_start_index : kli_arrearage_start_index + NUM_MONTHS * COLS_PER_MONTH + 2]
)
kli_arrearages = nwec.utility_reporting.arrearages.normalize_zip_class_cols(df, kli_arrearages)

In [8]:
zip_index = nwec.utils.excel.find_unpromoted_header(kli_arrearages, "Zip Code")
new_columns = kli_arrearages.select(kli_arrearages.columns[2:]).slice(zip_index[0], 1).to_dicts()[0]
vintage_cols = kli_arrearages.select(kli_arrearages.columns[2:]).slice(zip_index[0] + 1, 1).to_dicts()[0]
months = list({k: v for k, v in new_columns.items() if v is not None}.values())
for counter, col in enumerate(vintage_cols):
    current_month = months[counter // COLS_PER_MONTH]
    date = datetime.datetime.strptime(current_month, "%Y-%m-%d %H:%M:%S").astimezone(datetime.UTC)
    new_columns[col] = date.strftime("%B %Y")
    new_columns[col] = new_columns[col] + " " + vintage_cols[col]
new_columns = new_columns | {"Zip Code": "Zip Code", "Customer Class": "Customer Class"}

In [9]:
kli_arrearages = kli_arrearages.rename(new_columns).select(list(new_columns.values()))
kli_arrearages = kli_arrearages.filter(~pl.all_horizontal(pl.all().is_null()))
kli_arrearages = kli_arrearages.filter(pl.col("Customer Class").str.contains(r"(?i)residential")).drop(
    pl.col("Customer Class")
)
kli_arrearages = kli_arrearages.with_columns(
    [pl.col(col).cast(pl.Float64, strict=False) for col in kli_arrearages.columns[1:]]
)

In [10]:
kli_arrearages.sum()

April 2024 31 - 60 Days,April 2024 61 - 90 Days,April 2024 91+ Days,April 2024 Total Arrearages,May 2024 31 - 60 Days,May 2024 61 - 90 Days,May 2024 91+ Days,May 2024 Total Arrearages,June 2024 31 - 60 Days,June 2024 61 - 90 Days,June 2024 91+ Days,June 2024 Total Arrearages,Zip Code
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
,4149800.0,13362000.0,26811000.0,3874000.0,3110000.0,13463000.0,24092000.0,2884900.0,3175500.0,13603000.0,22751000.0,21913360.0
