In [8]:
import polars as pl

import nwec.utility_reporting.arrearages
import nwec.utils.excel
from nwec.constants import CLEAN_UTILITY_DATA, RAW_UTILITY_DATA

YEAR = 2023
NUM_MONTHS = 3
COLS_PER_MONTH = 5
SHEET_SEARCH_STRING = "Arrears"
ARREARAGE_SEARCH_STRING = "Past-due balances"
KLI_SEARCH_STRING = "known low-income"
spreadsheet = RAW_UTILITY_DATA / str(YEAR) / "pac.xlsx"

In [9]:
sheet_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, SHEET_SEARCH_STRING)
df = pl.read_excel(spreadsheet, sheet_id=sheet_index, has_header=False)
arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(
    df, NUM_MONTHS, COLS_PER_MONTH, ARREARAGE_SEARCH_STRING
)
kli_arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(df, NUM_MONTHS, COLS_PER_MONTH, KLI_SEARCH_STRING)

# Arrearages

In [10]:
date_to_zip_offset = 1
source_date_format = "%Y%m"

arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    arrearages, NUM_MONTHS, COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [11]:
arrearages.sum()

Zip Code,10 2023 COUNT,10 2023 DAYS 31 60,10 2023 DAYS 61 90,10 2023 DAYS 91+,10 2023 ARREARS,11 2023 COUNT,11 2023 DAYS 31 60,11 2023 DAYS 61 90,11 2023 DAYS 91+,11 2023 ARREARS,12 2023 COUNT,12 2023 DAYS 31 60,12 2023 DAYS 61 90,12 2023 DAYS 91+,12 2023 ARREARS
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
3566098,26654.0,2658300.0,1911600.0,6850700.0,11421000.0,26220.0,2267700.0,1520200.0,6853500.0,10642000.0,23822.0,2661200.0,1086800.0,6760100.0,10508000.0


# KLI Arrearages

In [12]:
date_to_zip_offset = 1
source_date_format = "%Y%m"

kli_arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    kli_arrearages, NUM_MONTHS, COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [13]:
kli_arrearages.sum()

Zip Code,10 2023 COUNT,10 2023 DAYS 31 60,10 2023 DAYS 61 90,10 2023 DAYS 91+,10 2023 ARREARS,11 2023 COUNT,11 2023 DAYS 31 60,11 2023 DAYS 61 90,11 2023 DAYS 91+,11 2023 ARREARS,12 2023 COUNT,12 2023 DAYS 31 60,12 2023 DAYS 61 90,12 2023 DAYS 91+,12 2023 ARREARS
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
3170370,4753.0,411087.59,382778.86,1613500.0,2407300.0,4975.0,407395.37,323422.86,1659000.0,2389800.0,4773.0,506641.0,268497.22,1672900.0,2448100.0


# Save Output

In [14]:
arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(arrearages, NUM_MONTHS)
arrearages = arrearages.with_columns(pl.lit("PAC").alias("Utility"))
arrearages = arrearages.with_columns(pl.lit("Residential").alias("Customer Class"))

kli_arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(kli_arrearages, NUM_MONTHS)
kli_arrearages = kli_arrearages.with_columns(pl.lit("PAC").alias("Utility"))
kli_arrearages = kli_arrearages.with_columns(pl.lit("KLI").alias("Customer Class"))

pl.concat([arrearages, kli_arrearages]).write_ipc(CLEAN_UTILITY_DATA / "pac.arrow")