In [None]:
import datetime

import polars as pl

from nwec.constants import DATA

YEAR = 2023

# Number of Disconnections

In [None]:
df = pl.read_excel(f"{DATA}/utility_reporting/avista/avista_{YEAR}.xlsx", sheet_id=2, has_header=False)

In [None]:
disconnects = df.select([df.columns[0]] + df.columns[2:15]).slice(1)
# Drop null columns and cast all columns to strings
null_columns = [col for col in disconnects.columns if disconnects[col][0] is None]
disconnects = disconnects.drop(null_columns)
disconnects = disconnects.with_columns([pl.col(col).cast(pl.Utf8) for col in disconnects.columns])
# Rename columns with proper headers
disconnects = disconnects.rename(disconnects.head(1).to_dicts()[0])
disconnects = disconnects.filter(pl.col("Zip Code").str.contains("Residential Total"))
disconnects = disconnects.with_columns([pl.col(col).cast(pl.Int32, strict=False) for col in disconnects.columns[2:]])
disconnects = disconnects.fill_null(0)

In [None]:
disconnects

Zip Code,2023-01-01 00:00:00,2023-02-01 00:00:00,2023-03-01 00:00:00,2023-04-01 00:00:00,2023-05-01 00:00:00,2023-06-01 00:00:00,2023-07-01 00:00:00,2023-08-01 00:00:00,2023-09-01 00:00:00,2023-10-01 00:00:00,2023-11-01 00:00:00,2023-12-01 00:00:00
str,str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32
"""Residential Totals""","""583""",806,1079,988,910,829,811,745,874,728,189,975


# Disconnection Notices

In [None]:
disconnect_notices = df.select([df.columns[0]] + df.columns[41:53]).slice(1)
# Drop null columns and cast all columns to strings
null_columns = [col for col in disconnect_notices.columns if disconnect_notices[col][0] is None]
disconnect_notices = disconnect_notices.drop(null_columns)
disconnect_notices = disconnect_notices.with_columns([pl.col(col).cast(pl.Utf8) for col in disconnect_notices.columns])
# Rename columns with proper headers
disconnect_notices = disconnect_notices.rename(disconnect_notices.head(1).to_dicts()[0])
disconnect_notices = disconnect_notices.filter(pl.col("Zip Code").str.contains("Residential Total"))
disconnect_notices = disconnect_notices.with_columns(
    [pl.col(col).cast(pl.Int32, strict=False) for col in disconnect_notices.columns[1:]]
)
disconnect_notices = disconnect_notices.fill_null(0)

In [None]:
disconnect_notices

Zip Code,2023-01-01 00:00:00,2023-02-01 00:00:00,2023-03-01 00:00:00,2023-04-01 00:00:00,2023-05-01 00:00:00,2023-06-01 00:00:00,2023-07-01 00:00:00,2023-08-01 00:00:00,2023-09-01 00:00:00,2023-10-01 00:00:00,2023-11-01 00:00:00,2023-12-01 00:00:00
str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32
"""Residential Totals""",6156,7039,8076,6259,5657,4585,3833,4638,4519,2999,4726,5373


# Bill Assistance (Payment Arrangements)

In [None]:
df = pl.read_excel(f"{DATA}/utility_reporting/avista/avista_{YEAR}.xlsx", sheet_id=4, has_header=False)

# Number of Customers

In [None]:
df = pl.read_excel(f"{DATA}/utility_reporting/avista/avista_{YEAR}.xlsx", sheet_id=8, has_header=False)

In [None]:
is_num_customers_section = (
    df.head(1)
    .select(pl.col(col).str.contains(r"(?i)number of customers by customer class") for col in df.columns)
    .row(0)
)
num_customers_start_index = next(index for index, value in enumerate(is_num_customers_section) if value)
num_customers = df.select(df.columns[:2] + df.columns[num_customers_start_index : num_customers_start_index + 12])

In [None]:
new_columns = num_customers.select(num_customers.columns[2:]).slice(2, 2).to_dicts()[0]
for col in new_columns:
    if type(new_columns[col]) is str:
        new_columns[col] = datetime.datetime.strptime(new_columns[col], "%Y-%m-%d %H:%M:%S").astimezone(datetime.UTC)
    new_columns[col] = new_columns[col].strftime("%B %Y")
new_columns = {"column_1": "Zip Code", "column_2": "Customer Class"} | new_columns
num_customers = (
    num_customers.rename(new_columns)
    .filter(pl.col("Customer Class").str.contains(r"(?i)^residential$"))
    .drop("Customer Class")
)

In [None]:
num_customers = num_customers.with_columns(
    [pl.col(col).cast(pl.Float64, strict=False) for col in num_customers.columns[1:]]
)
num_customers = num_customers.with_columns(pl.col("Zip Code").cast(pl.Int64))
num_customers.sum()

Zip Code,January 2023,February 2023,March 2023,April 2023,May 2023,June 2023,July 2023,August 2023,September 2023,October 2023,November 2023,December 2023
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
10407700,25858.0,27463.0,27880.0,29804.0,28946.0,26189.0,28295.0,27303.0,29041.0,28220.0,27866.0,28616.0
