In [1]:
import polars as pl

import nwec.utility_reporting.arrearages
import nwec.utils.excel
from nwec.constants import CLEAN_UTILITY_DATA, RAW_UTILITY_DATA

YEAR = 2024
NUM_MONTHS = 12
COLS_PER_MONTH = 4
SHEET_SEARCH_STRING = "past due balances"
ARREARAGE_SEARCH_STRING = "past-due balances by customer class"
KLI_SEARCH_STRING = "past-due balances for known low-income household"
spreadsheet = RAW_UTILITY_DATA / f"avista/avista_{YEAR}.xlsx"

In [2]:
sheet_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, SHEET_SEARCH_STRING)
df = pl.read_excel(spreadsheet, sheet_id=sheet_index, has_header=False)
arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(
    df, NUM_MONTHS, COLS_PER_MONTH, ARREARAGE_SEARCH_STRING
)
kli_arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(df, NUM_MONTHS, COLS_PER_MONTH, KLI_SEARCH_STRING)

# Arrearages

In [3]:
date_to_zip_offset = 1
source_date_format = "%Y-%m-%d %H:%M:%S"

arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    arrearages, NUM_MONTHS, COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [4]:
arrearages.sum()

Zip Code,01 2024 30 Days,01 2024 60 Days,01 2024 90 Days +,01 2024 Total Arrearages,02 2024 30 Days,02 2024 60 Days,02 2024 90 Days +,02 2024 Total Arrearages,03 2024 30 Days,03 2024 60 Days,03 2024 90 Days +,03 2024 Total Arrearages,04 2024 30 Days,04 2024 60 Days,04 2024 90 Days +,04 2024 Total Arrearages,05 2024 30 Days,05 2024 60 Days,05 2024 90 Days +,05 2024 Total Arrearages,06 2024 30 Days,06 2024 60 Days,06 2024 90 Days +,06 2024 Total Arrearages,07 2024 30 Days,07 2024 60 Days,07 2024 90 Days +,07 2024 Total Arrearages,08 2024 30 Days,08 2024 60 Days,08 2024 90 Days +,08 2024 Total Arrearages,09 2024 30 Days,09 2024 60 Days,09 2024 90 Days +,09 2024 Total Arrearages,10 2024 30 Days,10 2024 60 Days,10 2024 90 Days +,10 2024 Total Arrearages,11 2024 30 Days,11 2024 60 Days,11 2024 90 Days +,11 2024 Total Arrearages,12 2024 30 Days,12 2024 60 Days,12 2024 90 Days +,12 2024 Total Arrearages
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
10407700,1980200.0,1242000.0,2891600.0,6113800.0,2534000.0,1621400.0,2990600.0,7146000.0,3346000.0,2319500.0,3374400.0,9040000.0,2469800.0,2046855.3,3027000.0,7543700.0,2203400.0,1575000.0,2710900.0,6489200.0,1837900.0,1614900.0,2512300.0,5965100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# KLI Arrearages

In [5]:
date_to_zip_offset = 1
source_date_format = "%Y-%m-%d %H:%M:%S"

kli_arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    kli_arrearages, NUM_MONTHS, COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [6]:
kli_arrearages.sum()

Zip Code,01 2024 30 Days,01 2024 60 Days,01 2024 90 Days +,01 2024 Total Arrearages,02 2024 30 Days,02 2024 60 Days,02 2024 90 Days +,02 2024 Total Arrearages,03 2024 30 Days,03 2024 60 Days,03 2024 90 Days +,03 2024 Total Arrearages,04 2024 30 Days,04 2024 60 Days,04 2024 90 Days +,04 2024 Total Arrearages,05 2024 30 Days,05 2024 60 Days,05 2024 90 Days +,05 2024 Total Arrearages,06 2024 30 Days,06 2024 60 Days,06 2024 90 Days +,06 2024 Total Arrearages,07 2024 30 Days,07 2024 60 Days,07 2024 90 Days +,07 2024 Total Arrearages,08 2024 30 Days,08 2024 60 Days,08 2024 90 Days +,08 2024 Total Arrearages,09 2024 30 Days,09 2024 60 Days,09 2024 90 Days +,09 2024 Total Arrearages,10 2024 30 Days,10 2024 60 Days,10 2024 90 Days +,10 2024 Total Arrearages,11 2024 30 Days,11 2024 60 Days,11 2024 90 Days +,11 2024 Total Arrearages,12 2024 30 Days,12 2024 60 Days,12 2024 90 Days +,12 2024 Total Arrearages
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
10407700,359026.37,385153.04,1426800.0,2171000.0,418069.06,500612.85,1469200.0,2387800.0,423795.41,480086.37,1671500.0,2575400.0,294259.5,428951.27,1386400.0,2109600.0,247913.34,269823.11,1226600.0,1744300.0,181198.94,252652.83,1097700.0,1531600.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Save Output

In [7]:
arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(arrearages, NUM_MONTHS)
arrearages = arrearages.with_columns(pl.lit("Avista").alias("Utility"))
arrearages = arrearages.with_columns(pl.lit("Residential").alias("Customer Class"))

kli_arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(kli_arrearages, NUM_MONTHS)
kli_arrearages = kli_arrearages.with_columns(pl.lit("Avista").alias("Utility"))
kli_arrearages = kli_arrearages.with_columns(pl.lit("KLI").alias("Customer Class"))

pl.concat([arrearages, kli_arrearages]).write_ipc(CLEAN_UTILITY_DATA / "avista.arrow")