In [1]:
import polars as pl

import nwec.utility_reporting.arrearages
import nwec.utils.excel
from nwec.constants import CLEAN_UTILITY_DATA, RAW_UTILITY_DATA

YEAR = 2024
NUM_MONTHS = 6
COLS_PER_MONTH = 9
SHEET_SEARCH_STRING = "Past Due Balances"
ARREARAGE_SEARCH_STRING = "number of past-due balances"
KLI_SEARCH_STRING = "known low-income"
spreadsheet = RAW_UTILITY_DATA / f"cng/cng_{YEAR}.xlsx"

In [2]:
sheet_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, SHEET_SEARCH_STRING)
df = pl.read_excel(spreadsheet, sheet_id=sheet_index, has_header=False)
arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(
    df, NUM_MONTHS, COLS_PER_MONTH, ARREARAGE_SEARCH_STRING
)
kli_arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(df, NUM_MONTHS, COLS_PER_MONTH, KLI_SEARCH_STRING)

# Arrearages

In [3]:
date_to_zip_offset = 1
source_date_format = "%Y-%m-%d %H:%M:%S"

arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    arrearages, NUM_MONTHS, COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [4]:
arrearages.sum()

Zip Code,01 2024 30 Days Arrears,01 2024 30 Days Cust,01 2024 60 Days Arrears,01 2024 60 Days Cust,01 2024 90 Days Arrears,01 2024 90 Days Cust,01 2024 Total Arrearages,02 2024 30 Days Arrears,02 2024 30 Days Cust,02 2024 60 Days Arrears,02 2024 60 Days Cust,02 2024 90 Days Arrears,02 2024 90 Days Cust,02 2024 Total Arrearages,03 2024 30 Days Arrears,03 2024 30 Days Cust,03 2024 60 Days Arrears,03 2024 60 Days Cust,03 2024 90 Days Arrears,03 2024 90 Days Cust,03 2024 Total Arrearages,04 2024 30 Days Arrears,04 2024 30 Days Cust,04 2024 60 Days Arrears,04 2024 60 Days Cust,04 2024 90 Days Arrears,04 2024 90 Days Cust,04 2024 Total Arrearages,05 2024 30 Days Arrears,05 2024 30 Days Cust,05 2024 60 Days Arrears,05 2024 60 Days Cust,05 2024 90 Days Arrears,05 2024 90 Days Cust,05 2024 Total Arrearages,06 2024 30 Days Arrears,06 2024 30 Days Cust,06 2024 60 Days Arrears,06 2024 60 Days Cust,06 2024 90 Days Arrears,06 2024 90 Days Cust,06 2024 Total Arrearages
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
7397525,1564300.0,8994.0,477846.7,3231.0,495500.19,3958.0,2537700.0,1750700.0,8346.0,594804.61,3400.0,497099.93,3250.0,2842600.0,2040200.0,9644.0,1089400.0,5219.0,655486.48,3948.0,3785031.3,1601600.0,7566.0,954606.38,4208.0,953093.16,4652.0,3509300.0,1161100.0,7902.0,823555.85,3754.0,1114000.0,5184.0,3098700.0,981580.56,8791.0,709077.18,4416.0,1337100.0,6235.0,3027700.0


# KLI Arrearages

In [5]:
date_to_zip_offset = 1
source_date_format = "%Y-%m-%d %H:%M:%S"

kli_arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    kli_arrearages, NUM_MONTHS, COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [6]:
kli_arrearages.sum()

Zip Code,01 2024 30 Days Arrears,01 2024 30 Days Cust,01 2024 60 Days Arrears,01 2024 60 Days Cust,01 2024 90 Days Arrears,01 2024 90 Days Cust,01 2024 Total Arrearages,02 2024 30 Days Arrears,02 2024 30 Days Cust,02 2024 60 Days Arrears,02 2024 60 Days Cust,02 2024 90 Days Arrears,02 2024 90 Days Cust,02 2024 Total Arrearages,03 2024 30 Days Arrears,03 2024 30 Days Cust,03 2024 60 Days Arrears,03 2024 60 Days Cust,03 2024 90 Days Arrears,03 2024 90 Days Cust,03 2024 Total Arrearages,04 2024 30 Days Arrears,04 2024 30 Days Cust,04 2024 60 Days Arrears,04 2024 60 Days Cust,04 2024 90 Days Arrears,04 2024 90 Days Cust,04 2024 Total Arrearages,05 2024 30 Days Arrears,05 2024 30 Days Cust,05 2024 60 Days Arrears,05 2024 60 Days Cust,05 2024 90 Days Arrears,05 2024 90 Days Cust,05 2024 Total Arrearages,06 2024 30 Days Arrears,06 2024 30 Days Cust,06 2024 60 Days Arrears,06 2024 60 Days Cust,06 2024 90 Days Arrears,06 2024 90 Days Cust,06 2024 Total Arrearages
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
5326308,37980.28,212.0,14421.73,76.0,24308.45,137.0,76710.46,42867.52,193.0,24452.65,124.0,25340.89,137.0,92661.06,39462.79,162.0,25831.87,111.0,27596.23,126.0,92890.89,30089.83,148.0,24131.32,103.0,32790.1,139.0,87011.25,19222.34,96.0,18408.14,94.0,34336.19,145.0,71966.67,15416.11,100.0,12470.58,63.0,29702.61,145.0,57589.3


# Save Output

In [7]:
arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(arrearages, NUM_MONTHS)
arrearages = arrearages.with_columns(pl.lit("CNG").alias("Utility"))
arrearages = arrearages.with_columns(pl.lit("Residential").alias("Customer Class"))

kli_arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(kli_arrearages, NUM_MONTHS)
kli_arrearages = kli_arrearages.with_columns(pl.lit("CNG").alias("Utility"))
kli_arrearages = kli_arrearages.with_columns(pl.lit("KLI").alias("Customer Class"))

pl.concat([arrearages, kli_arrearages]).write_ipc(CLEAN_UTILITY_DATA / "cng.arrow")