In [8]:
import polars as pl

import nwec.utility_reporting.arrearages
import nwec.utils.excel
from nwec.constants import CLEAN_UTILITY_DATA, RAW_UTILITY_DATA

YEAR = 2023
NUM_MONTHS = 3
COLS_PER_MONTH = 8
KLI_COLS_PER_MONTH = 4
SHEET_SEARCH_STRING = "Past Due Balances"
ARREARAGE_SEARCH_STRING = "Past-due balances by customer class"
KLI_SEARCH_STRING = "known low-income"
spreadsheet = RAW_UTILITY_DATA / str(YEAR) / "nwng.xlsx"

In [9]:
sheet_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, SHEET_SEARCH_STRING)
df = pl.read_excel(spreadsheet, sheet_id=sheet_index, has_header=False)
arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(
    df, NUM_MONTHS, COLS_PER_MONTH, ARREARAGE_SEARCH_STRING
)
kli_arrearages = nwec.utility_reporting.arrearages.get_arrearages_df(
    df, NUM_MONTHS, KLI_COLS_PER_MONTH, KLI_SEARCH_STRING
)

# Arrearages

In [10]:
date_to_zip_offset = 1
source_date_format = "%b %Y"

arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    arrearages, NUM_MONTHS, COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [11]:
arrearages.sum()

Zip Code,10 2023 30 Days Amt,10 2023 30 Day Count,10 2023 60 Days Amt,10 2023 60 Days Count,10 2023 90 Days + Amt,10 2023 90 Days + Count,10 2023 Total Arrearages,10 2023 Total Count,11 2023 30 Days Amt,11 2023 30 Day Count,11 2023 60 Days Amt,11 2023 60 Days Count,11 2023 90 Days + Amt,11 2023 90 Days + Count,11 2023 Total Arrearages,11 2023 Total Count,12 2023 30 Days Amt,12 2023 30 Day Count,12 2023 60 Days Amt,12 2023 60 Days Count,12 2023 90 Days + Amt,12 2023 90 Days + Count,12 2023 Total Arrearages,12 2023 Total Count
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
4234744,225033.18,3137.0,135911.31,1529.0,352300.93,3319.0,713245.42,7985.0,338254.77,3815.0,121470.93,1288.0,315504.22,3292.0,775229.92,8395.0,369149.31,2648.0,86793.01,803.0,118700.66,1379.0,574642.98,4830.0


# KLI Arrearages

In [12]:
date_to_zip_offset = 1
source_date_format = "%b %Y"

kli_arrearages = nwec.utility_reporting.arrearages.combine_arrearage_year_vintage_cols(
    kli_arrearages, NUM_MONTHS, KLI_COLS_PER_MONTH, date_to_zip_offset, source_date_format
)

In [13]:
kli_arrearages.sum()

Zip Code,10 2023 30 Days,10 2023 60 Days,10 2023 90 Days +,10 2023 Total Arrearages,11 2023 30 Days,11 2023 60 Days,11 2023 90 Days +,11 2023 Total Arrearages,12 2023 30 Days,12 2023 60 Days,12 2023 90 Days +,12 2023 Total Arrearages
i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
4234744,5454.0,4050.09,18321.71,27825.8,9431.94,4243.59,18468.75,32144.28,7513.77,2412.37,4046.26,13972.4


# Save Output

In [14]:
arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(arrearages, NUM_MONTHS)
arrearages = arrearages.with_columns(pl.lit("NWNG").alias("Utility"))
arrearages = arrearages.with_columns(pl.lit("Residential").alias("Customer Class"))

kli_arrearages = nwec.utility_reporting.arrearages.normalize_arrearage_cols(kli_arrearages, NUM_MONTHS)
kli_arrearages = kli_arrearages.with_columns(pl.lit("NWNG").alias("Utility"))
kli_arrearages = kli_arrearages.with_columns(pl.lit("KLI").alias("Customer Class"))

pl.concat([arrearages, kli_arrearages]).write_ipc(CLEAN_UTILITY_DATA / "nwng.arrow")