In [1]:
import datetime

import polars as pl

import nwec.utility_reporting.arrearages
import nwec.utils.excel
from nwec.constants import DATA

YEAR = 2024
NUM_MONTHS = 3
COLS_PER_MONTH = 8
KLI_COLS_PER_MONTH = 4
spreadsheet = DATA / f"utility_reporting/nwng/nwng_{YEAR}.xlsx"

In [2]:
arrearages_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, "Past Due Balances")
df = pl.read_excel(spreadsheet, sheet_id=arrearages_index, has_header=False)

# Arrearages

In [3]:
_, arrearage_start_index = nwec.utils.excel.find_unpromoted_header(df, "Past-due balances by customer class")

# Set up arrearages-specific DF with built-in space for the zip code and customer class columns
arrearages = df.select(df.columns[arrearage_start_index : arrearage_start_index + NUM_MONTHS * COLS_PER_MONTH + 2])
arrearages = nwec.utility_reporting.arrearages.normalize_zip_class_cols(df, arrearages)

In [4]:
zip_index = nwec.utils.excel.find_unpromoted_header(arrearages, "Zip Code")
new_columns = arrearages.select(arrearages.columns[2:][:-2]).slice(zip_index[0] - 1, 1).to_dicts()[0]
vintage_cols = arrearages.select(arrearages.columns[2:][:-2]).slice(zip_index[0], 1).to_dicts()[0]
months = list({k: v for k, v in new_columns.items() if v is not None}.values())

for counter, col in enumerate(vintage_cols):
    current_month = months[counter // COLS_PER_MONTH]
    date = datetime.datetime.strptime(current_month, "%b %Y").astimezone(datetime.UTC)
    new_columns[col] = date.strftime("%B %Y")
    new_columns[col] = new_columns[col] + " " + vintage_cols[col]
new_columns = new_columns | {"Zip Code": "Zip Code", "Customer Class": "Customer Class"}

In [5]:
arrearages = arrearages.rename(new_columns).select(list(new_columns.values()))
arrearages = arrearages.filter(~pl.all_horizontal(pl.all().is_null()))
arrearages = (
    arrearages.filter(pl.col("Customer Class").str.contains(r"(?i)resident"))
    .drop(pl.col("Customer Class"))
    .drop(pl.selectors.matches(r"(?i)count"))
)
arrearages = arrearages.with_columns([pl.col(col).cast(pl.Float64, strict=False) for col in arrearages.columns[1:]])

In [6]:
arrearages.sum()

April 2024 30 Days Amt,April 2024 60 Days Amt,April 2024 90 Days + Amt,April 2024 Total Arrearages,May 2024 30 Days Amt,May 2024 60 Days Amt,May 2024 90 Days + Amt,May 2024 Total Arrearages,June 2024 30 Days Amt,June 2024 60 Days Amt,June 2024 90 Days + Amt,June 2024 Total Arrearages,Zip Code
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
,530589.64,262109.24,2093500.0,958601.13,538972.22,315149.72,1812700.0,755043.46,469782.58,390257.18,1615100.0,4234744.0


# KLI Arrearages

In [7]:
_, kli_arrearage_start_index = nwec.utils.excel.find_unpromoted_header(df, "known low-income")
_, kli_arrearage_start_index = nwec.utils.excel.find_unpromoted_header(df, "known low-income")

# Set up arrearages-specific DF with built-in space for the zip code and customer class columns
kli_arrearages = df.select(
    df.columns[kli_arrearage_start_index : kli_arrearage_start_index + NUM_MONTHS * KLI_COLS_PER_MONTH + 2]
)
kli_arrearages = nwec.utility_reporting.arrearages.normalize_zip_class_cols(df, kli_arrearages)

In [8]:
zip_index = nwec.utils.excel.find_unpromoted_header(kli_arrearages, "Zip Code")
new_columns = kli_arrearages.select(kli_arrearages.columns[2:][:-2]).slice(zip_index[0] - 1, 1).to_dicts()[0]
vintage_cols = kli_arrearages.select(kli_arrearages.columns[2:][:-2]).slice(zip_index[0], 1).to_dicts()[0]
months = list({k: v for k, v in new_columns.items() if v is not None}.values())
for counter, col in enumerate(vintage_cols):
    current_month = months[counter // KLI_COLS_PER_MONTH]
    date = datetime.datetime.strptime(current_month, "%b %Y").astimezone(datetime.UTC)
    new_columns[col] = date.strftime("%B %Y")
    new_columns[col] = new_columns[col] + " " + vintage_cols[col]
new_columns = new_columns | {"Zip Code": "Zip Code", "Customer Class": "Customer Class"}

In [9]:
kli_arrearages = kli_arrearages.rename(new_columns).select(list(new_columns.values()))
kli_arrearages = kli_arrearages.filter(~pl.all_horizontal(pl.all().is_null()))
kli_arrearages = (
    kli_arrearages.filter(pl.col("Customer Class").str.contains(r"(?i)resident"))
    .drop(pl.col("Customer Class"))
    .drop(pl.selectors.matches(r"(?i)count"))
)
kli_arrearages = kli_arrearages.with_columns(
    [pl.col(col).cast(pl.Float64, strict=False) for col in kli_arrearages.columns[1:]]
)

In [10]:
kli_arrearages.sum()

April 2024 30 Days,April 2024 60 Days,April 2024 90 Days +,April 2024 Total Arrearages,May 2024 30 Days,May 2024 60 Days,May 2024 90 Days +,May 2024 Total Arrearages,June 2024 30 Days,June 2024 60 Days,June 2024 90 Days +,June 2024 Total Arrearages,Zip Code
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
,14732.93,21041.81,52507.26,5867.38,5554.94,10261.1,21683.42,2729.54,2837.67,5641.03,11208.24,4234744.0
