In [1]:
import datetime

import polars as pl

import nwec.utility_reporting.arrearages
import nwec.utils.excel
from nwec.constants import DATA

YEAR = 2024
NUM_MONTHS = 3
COLS_PER_MONTH = 5
spreadsheet = DATA / f"utility_reporting/pac/pac_{YEAR}.xlsx"

In [2]:
arrearages_index = nwec.utils.excel.get_sheet_index_from_name(spreadsheet, "Arrears")
df = pl.read_excel(spreadsheet, sheet_id=arrearages_index, has_header=False)

# Arrearages

In [3]:
_, arrearage_start_index = nwec.utils.excel.find_unpromoted_header(df, "Past-due balances")

# Set up arrearages-specific DF with built-in space for the zip code and customer class columns
arrearages = df.select(df.columns[arrearage_start_index : arrearage_start_index + NUM_MONTHS * COLS_PER_MONTH + 2])
arrearages = nwec.utility_reporting.arrearages.normalize_zip_class_cols(df, arrearages)

In [4]:
zip_index = next((i for i, s in enumerate(arrearages.columns) if s == "Zip Code"), -1)
new_columns = arrearages.select(arrearages.columns).slice(zip_index, 1).to_dicts()[0]
vintage_cols = arrearages.select(arrearages.columns[2:]).slice(zip_index + 1, 1).to_dicts()[0]
months = list({k: v for k, v in new_columns.items() if v is not None}.values())
for counter, col in enumerate(vintage_cols):
    current_month = months[counter // COLS_PER_MONTH]
    date = datetime.datetime.strptime(current_month, "%Y%m").astimezone(datetime.UTC)
    new_columns[col] = date.strftime("%B %Y")
    new_columns[col] = new_columns[col] + " " + vintage_cols[col]
new_columns = new_columns | {"Zip Code": "Zip Code", "Customer Class": "Customer Class"}

In [5]:
new_columns

{'Customer Class': 'Customer Class',
 'Zip Code': 'Zip Code',
 'column_3': 'April 2024 COUNT',
 'column_4': 'April 2024 DAYS 31 60',
 'column_5': 'April 2024 DAYS 61 90',
 'column_6': 'April 2024 DAYS 91+',
 'column_7': 'April 2024 ARREARS',
 'column_8': 'May 2024 COUNT',
 'column_9': 'May 2024 DAYS 31 60',
 'column_10': 'May 2024 DAYS 61 90',
 'column_11': 'May 2024 DAYS 91+',
 'column_12': 'May 2024 ARREARS',
 'column_13': 'June 2024 COUNT',
 'column_14': 'June 2024 DAYS 31 60',
 'column_15': 'June 2024 DAYS 61 90',
 'column_16': 'June 2024 DAYS 91+',
 'column_17': 'June 2024 ARREARS'}

In [6]:
arrearages = arrearages.rename(new_columns)
arrearages = arrearages.filter(pl.col("Customer Class").str.contains(r"(?i)res")).drop(pl.col("Customer Class"))
arrearages = arrearages.with_columns([pl.col(col).cast(pl.Float64, strict=False) for col in arrearages.columns[1:]])
arrearages = arrearages.with_columns(pl.col("Zip Code").str.strip_chars(" ").cast(pl.Int64))

In [7]:
arrearages

Zip Code,April 2024 COUNT,April 2024 DAYS 31 60,April 2024 DAYS 61 90,April 2024 DAYS 91+,April 2024 ARREARS,May 2024 COUNT,May 2024 DAYS 31 60,May 2024 DAYS 61 90,May 2024 DAYS 91+,May 2024 ARREARS,June 2024 COUNT,June 2024 DAYS 31 60,June 2024 DAYS 61 90,June 2024 DAYS 91+,June 2024 ARREARS
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
98603,1.0,126.45,116.88,0.0,243.33,2.0,63.1,0.0,0.0,63.1,1.0,39.6,0.0,0.0,39.6
98901,2760.0,409035.13,297245.84,821727.99,1.5280e6,2633.0,284330.63,258781.16,895302.31,1438414.1,2484.0,225013.68,174372.83,906988.82,1.3064e6
98902,4597.0,474241.5,449773.91,870545.55,1.7946e6,4801.0,497963.82,282542.75,882692.74,1.6632e6,4879.0,398802.85,293721.44,822770.72,1.5153e6
98903,1853.0,289884.54,190875.71,589996.95,1070757.2,1765.0,220294.22,156895.55,552933.91,930123.68,1833.0,192436.04,133142.63,419754.29,745332.96
98904,,0.0,0.0,0.0,0.0,1.0,31.65,0.0,0.0,31.65,1.0,25.92,31.65,0.0,57.57
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
99350,7.0,1157.38,808.67,10.3,1976.35,8.0,897.79,393.88,127.57,1419.24,5.0,416.06,83.35,40.57,539.98
99360,59.0,2506.82,9976.62,118.24,12601.68,101.0,17216.37,864.21,3473.17,21553.75,85.0,9097.2,4688.32,3386.96,17172.48
99361,154.0,26330.88,14247.45,8347.04,48925.37,153.0,19130.32,13880.2,11539.08,44549.6,142.0,16131.84,9717.2,11800.04,37649.08
99362,2544.0,294348.54,153356.77,106156.84,553862.15,2368.0,200655.49,143298.63,133005.41,476959.53,2213.0,170758.31,100841.08,150835.84,422435.23


In [8]:
arrearages.sum()

Zip Code,April 2024 COUNT,April 2024 DAYS 31 60,April 2024 DAYS 61 90,April 2024 DAYS 91+,April 2024 ARREARS,May 2024 COUNT,May 2024 DAYS 31 60,May 2024 DAYS 61 90,May 2024 DAYS 91+,May 2024 ARREARS,June 2024 COUNT,June 2024 DAYS 31 60,June 2024 DAYS 61 90,June 2024 DAYS 91+,June 2024 ARREARS
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
3566098,27842.0,3809600.0,2744742.6,6521200.0,13075000.0,26927.0,2924600.0,2158981.1,6542400.0,11626000.0,25707.0,2390300.0,1640400.0,5879600.0,9910300.0


# KLI Arrearages

In [9]:
_, kli_arrearage_start_index = nwec.utils.excel.find_unpromoted_header(df, "known low-income")

# Set up arrearages-specific DF with built-in space for the zip code and customer class columns
kli_arrearages = df.select(
    df.columns[kli_arrearage_start_index : kli_arrearage_start_index + NUM_MONTHS * COLS_PER_MONTH + 2]
)
kli_arrearages = nwec.utility_reporting.arrearages.normalize_zip_class_cols(df, kli_arrearages)

In [10]:
zip_index = next((i for i, s in enumerate(kli_arrearages.columns) if s == "Zip Code"), -1)
new_columns = kli_arrearages.select(kli_arrearages.columns).slice(zip_index, 1).to_dicts()[0]
vintage_cols = kli_arrearages.select(kli_arrearages.columns[2:]).slice(zip_index + 1, 1).to_dicts()[0]
months = list({k: v for k, v in new_columns.items() if v is not None}.values())
for counter, col in enumerate(vintage_cols):
    current_month = months[counter // COLS_PER_MONTH]
    date = datetime.datetime.strptime(current_month, "%Y%m").astimezone(datetime.UTC)
    new_columns[col] = date.strftime("%B %Y")
    new_columns[col] = new_columns[col] + " " + vintage_cols[col]
new_columns = new_columns | {"Zip Code": "Zip Code", "Customer Class": "Customer Class"}

In [11]:
kli_arrearages = kli_arrearages.rename(new_columns)
kli_arrearages = kli_arrearages.filter(pl.col("Customer Class").str.contains(r"(?i)res")).drop(pl.col("Customer Class"))
kli_arrearages = kli_arrearages.with_columns(
    [pl.col(col).cast(pl.Float64, strict=False) for col in kli_arrearages.columns[1:]]
)
kli_arrearages = kli_arrearages.with_columns(pl.col("Zip Code").str.strip_chars(" ").cast(pl.Int64))

In [12]:
kli_arrearages.sum()

Zip Code,April 2024 COUNT,April 2024 DAYS 31 60,April 2024 DAYS 61 90,April 2024 DAYS 91+,April 2024 ARREARS,May 2024 COUNT,May 2024 DAYS 31 60,May 2024 DAYS 61 90,May 2024 DAYS 91+,May 2024 ARREARS,June 2024 COUNT,June 2024 DAYS 31 60,June 2024 DAYS 61 90,June 2024 DAYS 91+,June 2024 ARREARS
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
3170370,5825.0,754670.96,750073.43,1978600.0,3483299.7,5681.0,584360.03,546162.81,1990900.0,3121400.0,5393.0,451311.36,417393.8,1849496.9,2718200.0
