In [102]:
import pandas as pd

# Read data from Google Sheets
# url = f"https://docs.google.com/spreadsheets/d/1GuHkRddZm5Idxfa-IUlA-lXQFydduyau/gviz/tq?tqx=out:csv&sheet=Billing"
# df = pd.read_csv(url)

# Read data as excel file
# curl -L https://docs.google.com/spreadsheets/d/1GuHkRddZm5Idxfa-IUlA-lXQFydduyau/export?format=xlsx&sheet=Billing --output billing.xlsx

# read excel file billing.xlsx with the following instructions
# - first lin is blank ignore
# - 2, 3, 4, 5, 6 are are headers
df_excel_billing = pd.read_excel("billing.xlsx", header=[1, 2, 3, 4, 5])

df_excel_billing

Unnamed: 0_level_0,Which Unit,Dates,Dates,303 Main,303 Main,303 Main,303 Main,303 Main,303 Main,303 Main,...,303A ADU,303A ADU,303A ADU,303A ADU,303A ADU,303A ADU,303A ADU,303A ADU,303A ADU,303A ADU
Unnamed: 0_level_1,Table header,Dates,Dates,Meter Export Energy (Solar) \n(channel 2),Meter Export Energy (Solar) \n(channel 2),Meter Export Energy (Solar) \n(channel 2),Meter Import Energy\n(channel 1),Meter Import Energy\n(channel 1),Meter Import Energy\n(channel 1),Allocated Export Credits,...,PCE - Peninsula Clean Energy,PCE - Peninsula Clean Energy,PCE - Peninsula Clean Energy,PCE - Peninsula Clean Energy,PCE - Peninsula Clean Energy,PCE - Peninsula Clean Energy,PCE - Peninsula Clean Energy,PG&E - Pacific Gas & Electric,Other,Other
Unnamed: 0_level_2,Which PDF to look at for data?,Bill / Detail of Bill,Bill,n/a - calculated,n/a - calculated,Detail of Bill,Detail of Bill,Detail of Bill,Detail of Bill,Detail of Bill,...,Bill,Bill,Bill,Bill,Bill,Bill,Bill,Bill,Unnamed: 55_level_2,Unnamed: 56_level_2
Unnamed: 0_level_3,What page to look at?,Generation Charges / Page 1,Header,Unnamed: 3_level_3,Unnamed: 4_level_3,Page 3,Page 5,Page 5,Page 3,Page 5,...,Generation Charges,Generation Charges,Generation Charges,Generation Charges,Generation Charges,Generation Charges,Generation Charges,Delivery Charges,Unnamed: 55_level_3,Unnamed: 56_level_3
Unnamed: 0_level_4,Table subheader,Service End Date [Date],Billing Date [Date]\n(43),off peak [kWh]\n(1),peak [kWh]\n(2),total [kWh]\n(15),off peak [kWh]\n(3),peak [kWh]\n(4),total [kWh]\n(10),off peak [kWh]\n(5),...,PCE - peak cost \n[$]\n(25),PCE - Energy Cost Total\n[$],PCE - Net Generation Bonus\n[$]\n(26),Energy Commission Surcharge\n[$]\n(27),PCE - Total Energy Charges\n[$],PCE NEM Credit\n[$]\n(28),PCE Generation Charges due in Cash \n[$]\n(29),PG&E Electric Delivery Charges [$]\n(42),California Climate Credit\n[$],Total Bill in Mail Monthly\n[$]
0,,2024-05-07,2024-05-14 00:00:00,-884.0,-114.0,-998.0,294.0,88.0,382.0,-435.0,...,5.83791,-13.1745,-1.51,0,-14.6845,14.68,0,7.83,,7.83
1,,2024-06-06,2024-06-13 00:00:00,-1411.0,-190.0,-1601.0,330.0,121.0,451.0,-704.0,...,0.77081,-45.16649,-3.64,0,-48.80649,63.49,0,11.75,,11.75
2,,NaT,winter,,,,273.0,104.0,,-573.0,...,1.94597,,,,,,,,,
3,,NaT,summer,,,,57.0,17.0,,-131.0,...,-1.17516,,,,,,,,,
4,,2024-07-08,2024-07-15 00:00:00,-1547.0,-203.0,-1750.0,393.0,250.0,643.0,-1001.0,...,-3.72134,-40.33064,-2.89,0,-43.22064,106.71,0,12.53,,12.53
5,,NaT,07 - earlier period,,,,279.0,180.0,,-775.0,...,,,,,,,,,,
6,,NaT,07 - later period,,,,114.0,70.0,,-226.0,...,,,,,,,,,,
7,,2024-08-06,2024-08-16 00:00:00,-1214.0,-178.0,-1392.0,224.0,158.0,382.0,-491.0,...,5.114684,-36.523124,-3.14,0,-39.663124,146.37,0,11.36,,11.36
8,,NaT,08 - old rates,,,,,,,,...,4.03877,,,`,`,`,`,`,,
9,,NaT,08 - new rates,,,,,,,,...,1.075914,,,,,,,,,


In [51]:
from dataclasses import dataclass
from typing import List, Optional
from enum import Enum
import datetime

class NEM2A_MeterType(Enum):
    GenerationMeter = 1
    BenefitMeter = 2

class WhereFrom(Enum):
    PDF_BILL = 1
    PDF_DETAIL_OF_BILL = 2
    CALCULATED = 3
    FIXED_VALUE = 4
    NOT_PROVIDED = 5


# Stores information about where something is found in the PG&E billing system.
# It could be for many types of information, such as a date, a kWh value, or a cost.
@dataclass
class WhereFound:
    where_from: Optional[WhereFrom] = None    # tells the person where to find this on the bill
    where_on_pdf: Optional[str] = None        # tells the person where to find this on the bill
    kevins_number_code: Optional[int] = None  # a number that Kevin can use to identify this metric by looking at the november 2024 sample bill

# Represents a date and where to find that date in the PG&E billing system.
class EnergyDate:
    value: Optional[datetime.date] = None
    where_found: List[WhereFound] = None

    def __init__(self, value: Optional[datetime.date] = None, where_found: Optional[List[WhereFound]] = None):
        self.value = value
        self.where_found = where_found if where_found is not None else []

# Represents an energy or cost metric with value(s), unit, and where it was found.
class EnergyMetric:

    # Sometimes each metric comes from multiple values summed up.
    # This can happen when the rates change half way through the month (due to summer/winter trasitions or rake hikes)
    subcomponent_values: List[float] = None

    # The unit of the metric, such as kWh or $
    unit: Optional[str] = None

    # Where this metric was found in the PG&E billing system.
    where_found: List[WhereFound] = None

    def __init__(self, unit: Optional[str] = None):
        self.subcomponent_values = []
        self.unit = unit
        self.where_found = []

# Some energy metrics are time-of-use (TOU) based, meaning they have different values for peak and off-peak times.
class EnergyMetricTOU:
    peak: EnergyMetric
    off_peak: EnergyMetric
    total: EnergyMetric

    # constructor which initializes the value to None, unit to None, and where_found to an empty list
    def __init__(self, unit: Optional[str]):
        self.peak = EnergyMetric(unit)
        self.off_peak = EnergyMetric(unit)
        self.total = EnergyMetric(unit)

# Represents a monthly bill for a single meter within the NEM2A system.
class MeterBillingMonth:

    # key dates
    billing_date                    : EnergyDate
    service_end_date                : EnergyDate

    # Meter values (kWh) are always mapped to time of used.
    energy_export_meter_channel_2   : EnergyMetricTOU
    energy_import_meter_channel_1   : EnergyMetricTOU
    allocated_export_credits        : EnergyMetricTOU
    net_energy_usage_after_credits  : EnergyMetricTOU

    # PCE values
    pce_energy_cost                 : EnergyMetricTOU # cost is always TOU based
    pce_net_generation_bonus        : EnergyMetric
    pce_energy_commission_surcharge : EnergyMetric
    pce_total_energy_charges        : EnergyMetric
    pce_nem_credit                  : EnergyMetric
    pce_generation_charges_due_cash : EnergyMetric

    # PG&E
    pge_res_energy_charges          : EnergyMetric
    pge_baseline_credit             : EnergyMetric
    pge_da_cca_charges              : EnergyMetric
    pge_total_energy_charges        : EnergyMetric
    pge_nem_billing                 : EnergyMetric
    pge_minimum_delivery_charge     : EnergyMetric
    pge_nem_true_up_adjustment      : EnergyMetric
    pge_electric_delivery_charges   : EnergyMetric

    # Totals
    california_climate_credit       : EnergyMetric
    total_bill_in_mail              : EnergyMetric

    # constructor which sets the where found based on the nem2a meter type
    def __init__(self, nem2a_meter_type: NEM2A_MeterType = NEM2A_MeterType.GenerationMeter):

        # key dates are the same for both types of meters
        self.billing_date        = EnergyDate(where_found=[WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Header", kevins_number_code=43) ])
        self.service_end_date    = EnergyDate(where_found=[WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges"),
                                                           WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 1", kevins_number_code=44)])

        # Metering units are always kWh
        self.energy_export_meter_channel_2   = EnergyMetricTOU("kWh")
        self.energy_import_meter_channel_1   = EnergyMetricTOU("kWh")
        self.allocated_export_credits        = EnergyMetricTOU("kWh")
        self.net_energy_usage_after_credits  = EnergyMetricTOU("kWh")

        # PCE values always in $
        self.pce_energy_cost                 = EnergyMetricTOU("$")
        self.pce_net_generation_bonus        = EnergyMetric(unit = "$")
        self.pce_energy_commission_surcharge = EnergyMetric(unit = "$")
        self.pce_total_energy_charges        = EnergyMetric(unit = "$")
        self.pce_nem_credit                  = EnergyMetric(unit = "$")
        self.pce_generation_charges_due_cash = EnergyMetric(unit = "$")

        # PG&E values always in $
        self.pge_res_energy_charges          = EnergyMetric(unit = "$")
        self.pge_baseline_credit             = EnergyMetric(unit = "$")
        self.pge_da_cca_charges              = EnergyMetric(unit = "$")
        self.pge_total_energy_charges        = EnergyMetric(unit = "$")
        self.pge_nem_billing                 = EnergyMetric(unit = "$")
        self.pge_minimum_delivery_charge     = EnergyMetric(unit = "$")
        self.pge_nem_true_up_adjustment      = EnergyMetric(unit = "$")
        self.pge_electric_delivery_charges   = EnergyMetric(unit = "$")

        # Totals values always in $
        self.california_climate_credit       = EnergyMetric(unit = "$")
        self.total_bill_in_mail              = EnergyMetric(unit = "$")

        # METER CHANNEL 2: energy exported
        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.energy_export_meter_channel_2.off_peak.where_found.append(WhereFound(where_from=WhereFrom.CALCULATED))
            self.energy_export_meter_channel_2.peak.where_found.append(WhereFound(where_from=WhereFrom.CALCULATED))
            self.energy_export_meter_channel_2.total.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 3", kevins_number_code = 15))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            # Never any export on a benefit meter
            self.energy_export_meter_channel_2.off_peak.where_found.append(WhereFound(where_from=WhereFrom.FIXED_VALUE))
            self.energy_export_meter_channel_2.off_peak.subcomponent_values = [0]
            self.energy_export_meter_channel_2.peak.where_found.append(WhereFound(where_from=WhereFrom.FIXED_VALUE))
            self.energy_export_meter_channel_2.peak.subcomponent_values = [0]
            self.energy_export_meter_channel_2.total.where_found.append(WhereFound(where_from=WhereFrom.FIXED_VALUE))
            self.energy_export_meter_channel_2.total.subcomponent_values = [0]

        # METER CHANNEL 1: energy imported
        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.energy_import_meter_channel_1.off_peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 5", kevins_number_code = 3))
            self.energy_import_meter_channel_1.peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 5", kevins_number_code = 4))
            self.energy_import_meter_channel_1.total.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 3", kevins_number_code = 10))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.energy_import_meter_channel_1.off_peak.where_found.append(WhereFound(where_from=WhereFrom.CALCULATED))
            self.energy_import_meter_channel_1.peak.where_found.append(WhereFound(where_from=WhereFrom.CALCULATED))
            self.energy_import_meter_channel_1.total.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 3", kevins_number_code = 9))


        # ALLOCATED CREDITS
        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.allocated_export_credits.off_peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 5", kevins_number_code = 5))
            self.allocated_export_credits.peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 5", kevins_number_code = 16))
            self.allocated_export_credits.total.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 3", kevins_number_code = 14))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.allocated_export_credits.off_peak.where_found.append(WhereFound(where_from=WhereFrom.CALCULATED))
            self.allocated_export_credits.peak.where_found.append(WhereFound(where_from=WhereFrom.CALCULATED))
            self.allocated_export_credits.total.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 3", kevins_number_code = 13))


        # net energy usage after credits is calculated
        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            # off peak found in 2 places
            self.net_energy_usage_after_credits.off_peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 23))
            self.net_energy_usage_after_credits.off_peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 6", kevins_number_code = 23))
            # peak found in 2 places
            self.net_energy_usage_after_credits.peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 22))
            self.net_energy_usage_after_credits.peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 6", kevins_number_code = 22))
            # total found in 1 place
            self.net_energy_usage_after_credits.total.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 6", kevins_number_code = 7))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.net_energy_usage_after_credits.off_peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 21))
            self.net_energy_usage_after_credits.peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 20))
            self.net_energy_usage_after_credits.total.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 6", kevins_number_code = 8))

        # pce energy cost
        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.allocated_export_credits.off_peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 33))
            self.allocated_export_credits.peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 34))
            self.allocated_export_credits.total.where_found.append(WhereFound(where_from=WhereFrom.CALCULATED))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.allocated_export_credits.off_peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 24))
            self.allocated_export_credits.peak.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 25))
            self.allocated_export_credits.total.where_found.append(WhereFound(where_from=WhereFrom.CALCULATED))

        # other pce costs
        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.pce_net_generation_bonus.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 37))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.pce_net_generation_bonus.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 26))

        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.pce_energy_commission_surcharge.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 35))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.pce_energy_commission_surcharge.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 27))

        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.pce_total_energy_charges.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges"))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.pce_total_energy_charges.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges"))

        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.pce_nem_credit.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges"))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.pce_nem_credit.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 28))

        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.pce_generation_charges_due_cash.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges"))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.pce_generation_charges_due_cash.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Generation Charges", kevins_number_code = 29))

        # pg&e costs that are only found on generation meter side.
        # TODO: figure out how to calcualte these for benefit meter after true up.
        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.pge_res_energy_charges.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 4", kevins_number_code = 31))
            self.pge_baseline_credit.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 4", kevins_number_code = 32))
            self.pge_da_cca_charges.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 4", kevins_number_code = 38))
            self.pge_total_energy_charges.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 4", kevins_number_code = 39))
            self.pge_nem_billing.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 4", kevins_number_code = 40))
            self.pge_minimum_delivery_charge.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 4", kevins_number_code = 41))
            self.pge_nem_true_up_adjustment.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 1 - True Up Only", kevins_number_code = 40))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.pge_res_energy_charges.where_found.append(WhereFound(where_from=WhereFrom.NOT_PROVIDED))
            self.pge_baseline_credit.where_found.append(WhereFound(where_from=WhereFrom.NOT_PROVIDED))
            self.pge_da_cca_charges.where_found.append(WhereFound(where_from=WhereFrom.NOT_PROVIDED))
            self.pge_total_energy_charges.where_found.append(WhereFound(where_from=WhereFrom.NOT_PROVIDED))
            self.pge_nem_billing.where_found.append(WhereFound(where_from=WhereFrom.NOT_PROVIDED))
            self.pge_minimum_delivery_charge.where_found.append(WhereFound(where_from=WhereFrom.NOT_PROVIDED))
            self.pge_nem_true_up_adjustment.where_found.append(WhereFound(where_from=WhereFrom.NOT_PROVIDED))

        if nem2a_meter_type == NEM2A_MeterType.GenerationMeter:
            self.pge_electric_delivery_charges.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Electric Delivery Charges", kevins_number_code = 30))
            self.pge_electric_delivery_charges.where_found.append(WhereFound(where_from=WhereFrom.PDF_DETAIL_OF_BILL, where_on_pdf="Page 4", kevins_number_code = 30))
        elif nem2a_meter_type == NEM2A_MeterType.BenefitMeter:
            self.pge_electric_delivery_charges.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL, where_on_pdf="Electric Delivery Charges", kevins_number_code = 42))

        self.california_climate_credit.where_found.append(WhereFound(where_from=WhereFrom.PDF_BILL))

        # total bill is calcualted
        self.total_bill_in_mail.where_found.append(WhereFound(where_from=WhereFrom.CALCULATED))

# Represents a monthly bill for a single meter within the NEM2A system.
class NEM2AAggregationBillingMonth:

    main    : MeterBillingMonth
    adu     : MeterBillingMonth

    def __init__(self):
        self.main = MeterBillingMonth(nem2a_meter_type=NEM2A_MeterType.GenerationMeter)
        self.adu = MeterBillingMonth(nem2a_meter_type=NEM2A_MeterType.BenefitMeter)

In [52]:
from typing import Any

# Looks through the energy month object to find all metrics and their location in the PG&E billing system
def serialize_wherefound(obj: Any, path: str = ""):
    results = []

    if isinstance(obj, WhereFound):
        # Serialize WhereFound as a dict
        results.append({
            "path": path,
            "where_from": obj.where_from.name if obj.where_from else None,
            "where_on_pdf": obj.where_on_pdf,
            "kevins_number_code": obj.kevins_number_code
        })
    elif isinstance(obj, list):
        for idx, item in enumerate(obj):
            results.extend(serialize_wherefound(item, f"{path}[{idx}]"))

    elif hasattr(obj, "__dict__"):
        for attr, value in obj.__dict__.items():
            results.extend(serialize_wherefound(value, f"{path}.{attr}" if path else attr))

    return results


sample = MeterBillingMonth(NEM2A_MeterType.BenefitMeter)
april2025 = NEM2AAggregationBillingMonth()

# Serialize the object
serialized = serialize_wherefound(april2025)

# count the number of values
count = 0
for item in serialized:
    # show all the items that are from PDR_BILL or PDF_DETAIL_OF_BILL
    if item["where_from"] in [WhereFrom.PDF_BILL.name, WhereFrom.PDF_DETAIL_OF_BILL.name]:
        print(item)
        count += 1

# Print the count of values found
print("Count of values found:", count)


{'path': 'main.billing_date.where_found[0]', 'where_from': 'PDF_BILL', 'where_on_pdf': 'Header', 'kevins_number_code': 43}
{'path': 'main.service_end_date.where_found[0]', 'where_from': 'PDF_BILL', 'where_on_pdf': 'Generation Charges', 'kevins_number_code': None}
{'path': 'main.service_end_date.where_found[1]', 'where_from': 'PDF_DETAIL_OF_BILL', 'where_on_pdf': 'Page 1', 'kevins_number_code': 44}
{'path': 'main.energy_export_meter_channel_2.total.where_found[0]', 'where_from': 'PDF_DETAIL_OF_BILL', 'where_on_pdf': 'Page 3', 'kevins_number_code': 15}
{'path': 'main.energy_import_meter_channel_1.peak.where_found[0]', 'where_from': 'PDF_DETAIL_OF_BILL', 'where_on_pdf': 'Page 5', 'kevins_number_code': 4}
{'path': 'main.energy_import_meter_channel_1.off_peak.where_found[0]', 'where_from': 'PDF_DETAIL_OF_BILL', 'where_on_pdf': 'Page 5', 'kevins_number_code': 3}
{'path': 'main.energy_import_meter_channel_1.total.where_found[0]', 'where_from': 'PDF_DETAIL_OF_BILL', 'where_on_pdf': 'Page 3', '

In [144]:
#import re
import re
import json

# Array of months from May 2024 to April 2025
months = ["May", "June", "July", "August", "September", "October", "November", "December", "January", "February", "March", "April"]

# make an array of NEM2AAggregationBillingMonth objects for each month
Billing2024to2025 = [NEM2AAggregationBillingMonth() for _ in months]

# store the (#) number in a tuple with the column column name
headers_by_number_code = {}

for i, col in enumerate(df_excel_billing.columns):
    # look through the col for an integer number inside of parenthesis like (#)
    match = re.search(r"\((\d+)\)", str(col))
    if match:
        # Store as a key-value pair: number -> column name
        headers_by_number_code[int(match.group(1))] = col

headers_by_number_code

{43: ('Dates', 'Dates', 'Bill', 'Header', 'Billing Date [Date]\n(43)'),
 1: ('303 Main',
  'Meter Export Energy (Solar) \n(channel 2)',
  'n/a - calculated',
  'Unnamed: 3_level_3',
  'off peak [kWh]\n(1) '),
 2: ('303 Main',
  'Meter Export Energy (Solar) \n(channel 2)',
  'n/a - calculated',
  'Unnamed: 4_level_3',
  'peak [kWh]\n(2)'),
 15: ('303 Main',
  'Meter Export Energy (Solar) \n(channel 2)',
  'Detail of Bill',
  'Page 3',
  'total [kWh]\n(15)'),
 3: ('303 Main',
  'Meter Import Energy\n(channel 1)',
  'Detail of Bill',
  'Page 5',
  'off peak [kWh]\n(3)'),
 4: ('303 Main',
  'Meter Import Energy\n(channel 1)',
  'Detail of Bill',
  'Page 5',
  'peak [kWh]\n(4)'),
 10: ('303 Main',
  'Meter Import Energy\n(channel 1)',
  'Detail of Bill',
  'Page 3',
  'total [kWh]\n(10)'),
 5: ('303 Main',
  'Allocated Export Credits',
  'Detail of Bill',
  'Page 5',
  'off peak [kWh]\n(5)'),
 6: ('303 Main',
  'Allocated Export Credits',
  'Detail of Bill',
  'Page 5',
  'peak [kWh]\n(6)')

In [149]:
# Show the Service End Date for all rows
service_end_dates = df_excel_billing[('Dates', 'Dates', 'Bill / Detail of Bill', 'Generation Charges / Page 1', 'Service End Date [Date]')]

# Group row indices by month, including the base service end date row in the group
month_groups = {month: [] for month in months}

current_month_idx = 0
for i, date in enumerate(service_end_dates):
    if not pd.isna(date):
        # Found a new month, assign this row as the base for the current month
        if current_month_idx < len(months):
            current_month = months[current_month_idx]
            month_groups[current_month].append(i)
            current_month_idx += 1
    else:
        # Assign empty date to the last seen month
        if current_month_idx > 0:
            month_groups[months[current_month_idx - 1]].append(i)

# print the excel data for month_groups["May"]
may_rows = month_groups["May"]
for row in may_rows:
    extracted_values = df_excel_billing.iloc[row][headers_by_number_code[4]]
    print(extracted_values)

88.0


382.0
