In [1]:
import datetime

import polars as pl

import nwec.utility_reporting.arrearages
import nwec.utils.excel
from nwec.constants import DATA

YEAR = 2024
NUM_MONTHS = 6
COLS_PER_MONTH = 9
spreadsheet = DATA / f"utility_reporting/cng/cng_{YEAR}.xlsx"

# Arrearages

In [2]:
arrearages_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, "Past Due Balances")
df = pl.read_excel(spreadsheet, sheet_id=arrearages_index, has_header=False)

In [3]:
_, arrearage_start_index = nwec.utils.excel.find_unpromoted_header(df, "number of past-due balances")

# Set up arrearages-specific DF with built-in space for the zip code and customer class columns
arrearages = df.select(df.columns[arrearage_start_index : arrearage_start_index + NUM_MONTHS * COLS_PER_MONTH + 2])
arrearages = nwec.utility_reporting.arrearages.normalize_zip_class_cols(df, arrearages)

In [4]:
zip_index = nwec.utils.excel.find_unpromoted_header(arrearages, "Zip Code")
new_columns = arrearages.select(arrearages.columns[2:][:-2]).slice(zip_index[0] - 1, 1).to_dicts()[0]
vintage_cols = arrearages.select(arrearages.columns[2:][:-2]).slice(zip_index[0], 1).to_dicts()[0]
months = list({k: v for k, v in new_columns.items() if v is not None}.values())

for counter, col in enumerate(vintage_cols):
    current_month = months[counter // COLS_PER_MONTH]
    date = datetime.datetime.strptime(current_month, "%Y-%m-%d %H:%M:%S").astimezone(datetime.UTC)
    new_columns[col] = date.strftime("%B %Y")
    new_columns[col] = new_columns[col] + " " + vintage_cols[col]
new_columns = new_columns | {"Zip Code": "Zip Code", "Customer Class": "Customer Class"}

In [5]:
arrearages = arrearages.rename(new_columns).select(list(new_columns.values()))
arrearages = arrearages.filter(~pl.all_horizontal(pl.all().is_null()))
arrearages = arrearages.filter(pl.col("Customer Class").str.contains(r"(?i)resident")).drop(pl.col("Customer Class"))
arrearages = pl.concat(
    [arrearages.select("Zip Code"), arrearages.drop(pl.selectors.matches(r"(?i)zip|customer class"))], how="horizontal"
)
arrearages = arrearages.with_columns([pl.col(col).cast(pl.Float64, strict=False) for col in arrearages.columns[1:]])

In [6]:
arrearages.sum()

Zip Code,January 2024 30 Days Arrears,January 2024 30 Days Cust,January 2024 60 Days Arrears,January 2024 60 Days Cust,January 2024 90 Days Arrears,January 2024 90 Days Cust,January 2024 Total Arrearages,February 2024 30 Days Arrears,February 2024 30 Days Cust,February 2024 60 Days Arrears,February 2024 60 Days Cust,February 2024 90 Days Arrears,February 2024 90 Days Cust,February 2024 Total Arrearages,March 2024 30 Days Arrears,March 2024 30 Days Cust,March 2024 60 Days Arrears,March 2024 60 Days Cust,March 2024 90 Days Arrears,March 2024 90 Days Cust,March 2024 Total Arrearages,April 2024 30 Days Arrears,April 2024 30 Days Cust,April 2024 60 Days Arrears,April 2024 60 Days Cust,April 2024 90 Days Arrears,April 2024 90 Days Cust,April 2024 Total Arrearages,May 2024 30 Days Arrears,May 2024 30 Days Cust,May 2024 60 Days Arrears,May 2024 60 Days Cust,May 2024 90 Days Arrears,May 2024 90 Days Cust,May 2024 Total Arrearages,June 2024 30 Days Arrears,June 2024 30 Days Cust,June 2024 60 Days Arrears,June 2024 60 Days Cust,June 2024 90 Days Arrears,June 2024 90 Days Cust,June 2024 Total Arrearages
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
,1564300.0,8994.0,477846.7,3231.0,495500.19,3958.0,2537700.0,1750700.0,8346.0,594804.61,3400.0,497099.93,3250.0,2842600.0,2040200.0,9644.0,1089400.0,5219.0,655486.48,3948.0,3785031.3,1601600.0,7566.0,954606.38,4208.0,953093.16,4652.0,3509300.0,1161100.0,7902.0,823555.85,3754.0,1114000.0,5184.0,3098700.0,981580.56,8791.0,709077.18,4416.0,1337100.0,6235.0,3027700.0


# KLI Arrearages
Almost the same as arrearages, but with different search string.  Also they swapped the order of customer class and zip code columns for whatever reason

In [7]:
kli_arrearages_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, "Past Due Balances")
df = pl.read_excel(spreadsheet, sheet_id=kli_arrearages_index, has_header=False)

In [8]:
_, kli_arrearage_start_index = nwec.utils.excel.find_unpromoted_header(df, "known low-income")

# Set up arrearages-specific DF with built-in space for the zip code and customer class columns
kli_arrearages = df.select(
    df.columns[kli_arrearage_start_index : kli_arrearage_start_index + NUM_MONTHS * COLS_PER_MONTH + 2]
)
kli_arrearages = nwec.utility_reporting.arrearages.normalize_zip_class_cols(df, kli_arrearages)

In [9]:
zip_index = nwec.utils.excel.find_unpromoted_header(kli_arrearages, "Zip Code")
new_columns = kli_arrearages.select(kli_arrearages.columns[2:][:-2]).slice(zip_index[0] - 1, 1).to_dicts()[0]
vintage_cols = kli_arrearages.select(kli_arrearages.columns[2:][:-2]).slice(zip_index[0], 1).to_dicts()[0]
months = list({k: v for k, v in new_columns.items() if v is not None}.values())

for counter, col in enumerate(vintage_cols):
    current_month = months[counter // COLS_PER_MONTH]
    date = datetime.datetime.strptime(current_month, "%Y-%m-%d %H:%M:%S").astimezone(datetime.UTC)
    new_columns[col] = date.strftime("%B %Y")
    new_columns[col] = new_columns[col] + " " + vintage_cols[col]
new_columns = new_columns | {"Zip Code": "Zip Code", "Customer Class": "Customer Class"}

In [10]:
kli_arrearages = kli_arrearages.rename(new_columns).select(list(new_columns.values()))
kli_arrearages = kli_arrearages.filter(~pl.all_horizontal(pl.all().is_null()))
kli_arrearages = kli_arrearages.filter(pl.col("Customer Class").str.contains(r"(?i)resident")).drop(
    pl.col("Customer Class")
)
kli_arrearages = pl.concat(
    [kli_arrearages.select("Zip Code"), kli_arrearages.drop(pl.selectors.matches(r"(?i)zip|customer class"))],
    how="horizontal",
)
kli_arrearages = kli_arrearages.with_columns(
    [pl.col(col).cast(pl.Float64, strict=False) for col in kli_arrearages.columns[1:]]
)

In [11]:
kli_arrearages.sum()

Zip Code,January 2024 30 Days Arrears,January 2024 30 Days Cust,January 2024 60 Days Arrears,January 2024 60 Days Cust,January 2024 90 Days Arrears,January 2024 90 Days Cust,January 2024 Total Arrearages,February 2024 30 Days Arrears,February 2024 30 Days Cust,February 2024 60 Days Arrears,February 2024 60 Days Cust,February 2024 90 Days Arrears,February 2024 90 Days Cust,February 2024 Total Arrearages,March 2024 30 Days Arrears,March 2024 30 Days Cust,March 2024 60 Days Arrears,March 2024 60 Days Cust,March 2024 90 Days Arrears,March 2024 90 Days Cust,March 2024 Total Arrearages,April 2024 30 Days Arrears,April 2024 30 Days Cust,April 2024 60 Days Arrears,April 2024 60 Days Cust,April 2024 90 Days Arrears,April 2024 90 Days Cust,April 2024 Total Arrearages,May 2024 30 Days Arrears,May 2024 30 Days Cust,May 2024 60 Days Arrears,May 2024 60 Days Cust,May 2024 90 Days Arrears,May 2024 90 Days Cust,May 2024 Total Arrearages,June 2024 30 Days Arrears,June 2024 30 Days Cust,June 2024 60 Days Arrears,June 2024 60 Days Cust,June 2024 90 Days Arrears,June 2024 90 Days Cust,June 2024 Total Arrearages
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
,37980.28,212.0,14421.73,76.0,24308.45,137.0,76710.46,42867.52,193.0,24452.65,124.0,25340.89,137.0,92661.06,39462.79,162.0,25831.87,111.0,27596.23,126.0,92890.89,30089.83,148.0,24131.32,103.0,32790.1,139.0,87011.25,19222.34,96.0,18408.14,94.0,34336.19,145.0,71966.67,15416.11,100.0,12470.58,63.0,29702.61,145.0,57589.3
