In [1]:
import polars as pl

import nwec.utility_reporting.arrearages
import nwec.utils.excel
from nwec.constants import CLEAN_UTILITY_DATA, RAW_UTILITY_DATA

YEAR = 2024
NUM_MONTHS = 3
COLS_PER_MONTH = 5
SHEET_SEARCH_STRING = "Arrears"
ARREARAGE_SEARCH_STRING = "Past-due balances"
KLI_SEARCH_STRING = "known low-income"
spreadsheet = RAW_UTILITY_DATA / f"pac/pac_{YEAR}.xlsx"

In [2]:
sheet_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, SHEET_SEARCH_STRING)
df = pl.read_excel(spreadsheet, sheet_id=sheet_index, has_header=False)
arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(
    df, NUM_MONTHS, COLS_PER_MONTH, ARREARAGE_SEARCH_STRING
)
kli_arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(df, NUM_MONTHS, COLS_PER_MONTH, KLI_SEARCH_STRING)

# Arrearages

In [3]:
date_to_zip_offset = 1
source_date_format = "%Y%m"

arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    arrearages, NUM_MONTHS, COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [4]:
arrearages.sum()

Zip Code,04 2024 COUNT,04 2024 DAYS 31 60,04 2024 DAYS 61 90,04 2024 DAYS 91+,04 2024 ARREARS,05 2024 COUNT,05 2024 DAYS 31 60,05 2024 DAYS 61 90,05 2024 DAYS 91+,05 2024 ARREARS,06 2024 COUNT,06 2024 DAYS 31 60,06 2024 DAYS 61 90,06 2024 DAYS 91+,06 2024 ARREARS
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
3566098,27842.0,3809600.0,2744742.6,6521200.0,13075000.0,26927.0,2924600.0,2158981.1,6542400.0,11626000.0,25707.0,2390300.0,1640400.0,5879600.0,9910300.0


# KLI Arrearages

In [5]:
date_to_zip_offset = 1
source_date_format = "%Y%m"

kli_arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    kli_arrearages, NUM_MONTHS, COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [6]:
kli_arrearages.sum()

Zip Code,04 2024 COUNT,04 2024 DAYS 31 60,04 2024 DAYS 61 90,04 2024 DAYS 91+,04 2024 ARREARS,05 2024 COUNT,05 2024 DAYS 31 60,05 2024 DAYS 61 90,05 2024 DAYS 91+,05 2024 ARREARS,06 2024 COUNT,06 2024 DAYS 31 60,06 2024 DAYS 61 90,06 2024 DAYS 91+,06 2024 ARREARS
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
3170370,5825.0,754670.96,750073.43,1978600.0,3483299.7,5681.0,584360.03,546162.81,1990900.0,3121400.0,5393.0,451311.36,417393.8,1849496.9,2718200.0


# Save Output

In [7]:
arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(arrearages, NUM_MONTHS)
arrearages = arrearages.with_columns(pl.lit("PAC").alias("Utility"))
arrearages = arrearages.with_columns(pl.lit("Residential").alias("Customer Class"))

kli_arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(kli_arrearages, NUM_MONTHS)
kli_arrearages = kli_arrearages.with_columns(pl.lit("PAC").alias("Utility"))
kli_arrearages = kli_arrearages.with_columns(pl.lit("KLI").alias("Customer Class"))

pl.concat([arrearages, kli_arrearages]).write_ipc(CLEAN_UTILITY_DATA / "pac.arrow")