# PGE Budget Billing vs. Traditional

PGE has a new program, "Budget Billing":

"The Budget Billing program averages your energy costs over the previous 12 months to determine your monthly payment amount. If your actual energy costs significantly change, we adjust your monthly Budget Billing payment amount once every four months."

Questions for PGE:
+ What counts as "significant" change?

Questions for analysis:
+ Historically, would we have benefitted from this?
+ If so, what is the optimal month to start (given the 4-month update cycle)?

In [24]:
# starting after 1 full year, compute rolling 12-month average and compare it x4 to the sum of the next 4 months

# double check I pulled the full PGE data (I really want to start at December 2020)

In [2]:
import pandas as pd
import numpy as np
import datetime

In [3]:
# make this csv parsing into a function

gas = pd.read_csv('data/pge_gas_billing_data_4321483928_2021-04-02_to_2024-03-01.csv',
                    header=4, parse_dates=['START DATE', 'END DATE'])
gas['MID-DATE'] = gas['START DATE'] + datetime.timedelta(days=5) # to accomodate start dates at the end of the month
gas['MONTH'] = gas['MID-DATE'].dt.to_period('M')
electric = pd.read_csv('data/pge_electric_billing_data_4323839271_2021-04-01_to_2024-02-29.csv',
                    header=4, parse_dates=['START DATE', 'END DATE'])
electric['MID-DATE'] = electric['START DATE'] + datetime.timedelta(days=5)
electric['MONTH'] = electric['MID-DATE'].dt.to_period('M')
total = pd.merge(left=gas, right=electric, left_on='MONTH', right_on='MONTH',
                suffixes=['_gas', '_electric'])
total['TOTAL COST'] = total['COST_gas'].str.slice(start=1).astype(float) + total['COST_electric'].str.slice(start=1).astype(float)
total.sort_values(by='MONTH', ascending=True)
total['1Y_ROLLING_AVG'] = total.rolling(12).mean().round(2)['TOTAL COST']

In [4]:
total

Unnamed: 0,TYPE_gas,START DATE_gas,END DATE_gas,USAGE_gas,UNITS_gas,COST_gas,NOTES_gas,MID-DATE_gas,MONTH,TYPE_electric,START DATE_electric,END DATE_electric,USAGE_electric,UNITS_electric,COST_electric,NOTES_electric,MID-DATE_electric,TOTAL COST,1Y_ROLLING_AVG
0,Natural gas billing,2021-04-02,2021-05-03,25.0,therms,$43.12,,2021-04-07,2021-04,Electric billing,2021-04-01,2021-05-02,138.74,kWh,$31.79,,2021-04-06,74.91,
1,Natural gas billing,2021-05-04,2021-06-02,16.0,therms,$25.38,,2021-05-09,2021-05,Electric billing,2021-05-03,2021-06-01,125.33,kWh,$28.81,,2021-05-08,54.19,
2,Natural gas billing,2021-06-03,2021-07-01,8.0,therms,$12.73,,2021-06-08,2021-06,Electric billing,2021-06-02,2021-06-30,96.14,kWh,$24.68,,2021-06-07,37.41,
3,Natural gas billing,2021-07-02,2021-08-02,12.0,therms,$18.81,,2021-07-07,2021-07,Electric billing,2021-07-01,2021-08-01,112.2,kWh,$28.77,,2021-07-06,47.58,
4,Natural gas billing,2021-08-03,2021-09-01,13.0,therms,$20.94,,2021-08-08,2021-08,Electric billing,2021-08-02,2021-08-31,119.09,kWh,$30.81,,2021-08-07,51.75,
5,Natural gas billing,2021-09-02,2021-10-01,13.0,therms,$24.01,,2021-09-07,2021-09,Electric billing,2021-09-01,2021-09-30,126.13,kWh,$32.59,,2021-09-06,56.6,
6,Natural gas billing,2021-10-02,2021-11-01,17.0,therms,$35.65,,2021-10-07,2021-10,Electric billing,2021-10-01,2021-10-31,121.12,kWh,$27.97,,2021-10-06,63.62,
7,Natural gas billing,2021-11-02,2021-12-02,21.0,therms,$46.93,,2021-11-07,2021-11,Electric billing,2021-11-01,2021-12-01,109.94,kWh,$25.38,,2021-11-06,72.31,
8,Natural gas billing,2021-12-03,2021-12-30,38.0,therms,$85.41,,2021-12-08,2021-12,Electric billing,2021-12-02,2021-12-29,111.59,kWh,$25.89,,2021-12-07,111.3,
9,Natural gas billing,2021-12-31,2022-01-31,41.0,therms,$96.63,,2022-01-05,2022-01,Electric billing,2021-12-30,2022-01-30,130.0,kWh,$33.16,,2022-01-04,129.79,


In [5]:
row_index = np.isnan(total['1Y_ROLLING_AVG']).argmin(axis=0) # first non-null rolling average
row_index

# start from here to calculate what month is optimal to start with

11

In [6]:
# total.iloc[11]['MONTH'] is really Period('2022-03', 'M')
string_of_period = str(total.iloc[11]['MONTH'])
datetime.datetime.strptime(string_of_period, '%Y-%M') # this is not == total.iloc[11]['MONTH']

# figure out how a user can enter the date '2022-03' as the start and we can find that in the df
# idea: now convert df 'MONTH' column to strings.....

datetime.datetime(2022, 1, 1, 0, 3)

In [7]:
def cost_comparison_quarterly_one_year(row_index):
    df = pd.DataFrame(columns=['start_month', 'end_month', 'cheaper_plan', 'cheaper_by_$'])
    start_row = row_index
    for i in range(4):
        start_month = total.iloc[start_row]['MONTH']
        end_month = total.iloc[start_row+3]['MONTH']
        budget_cost = total.iloc[start_row]['1Y_ROLLING_AVG']*4
        actual_cost = 0
        for i in range(4):
            actual_cost += total.iloc[start_row+i]['TOTAL COST']
        cost_difference = actual_cost - budget_cost
        if cost_difference > 0:
            #print(f'For the months {start_month} to {end_month}, \n\
            #budget cost is cheaper by ${cost_difference:.2f}.')
            cheaper_plan = 'budget cost'
        if cost_difference < 0:
            #print(f'For the months {start_month} to {end_month}, \n\
            #the actual cost is cheaper by ${-cost_difference:.2f}.')
            cheaper_plan = 'actual cost'
            cost_difference *= -1
        if cost_difference == 0:
            #print(f'For the months {start_month} to {end_month}, \n\
            #there is no cost difference.')
            cheaper_plan = 'no difference'
        df.loc[len(df.index)] = [start_month, end_month, cheaper_plan, cost_difference]
        start_row += 3
    return df

In [8]:
cost_comparison_quarterly_one_year(11)

Unnamed: 0,start_month,end_month,cheaper_plan,cheaper_by_$
0,2022-03,2022-06,actual cost,37.68
1,2022-06,2022-09,actual cost,108.15
2,2022-09,2022-12,actual cost,39.87
3,2022-12,2023-03,budget cost,164.32


In [9]:
def cost_comparison_yearly(quarterly_df):
    df = pd.DataFrame(columns=['start_month', 'end_month', 'cheaper_plan', 'cheaper_by_$'])
    start_month = quarterly_df['start_month'].min()
    end_month = quarterly_df['end_month'].max()
    quarterly_df['cheaper_by_$'].where(quarterly_df['cheaper_plan']=='actual cost', -quarterly_df['cheaper_by_$'], inplace=True)
    cost_difference = quarterly_df['cheaper_by_$'].sum()
    if cost_difference > 0:
        cheaper_plan = 'actual cost'
    if cost_difference < 0:
        cheaper_plan = 'budget cost'
        cost_difference *= -1
    if cost_difference == 0:
        cheaper_plan = 'no difference'
    df.loc[len(df.index)] = [start_month, end_month, cheaper_plan, cost_difference]
    return df

In [10]:
cost_comparison_yearly(cost_comparison_quarterly_one_year(11))

Unnamed: 0,start_month,end_month,cheaper_plan,cheaper_by_$
0,2022-03,2023-03,actual cost,21.38


In [11]:
start_month = np.isnan(total['1Y_ROLLING_AVG']).argmin(axis=0) # first non-null rolling average
start_month

11

In [23]:
analysis_df = pd.DataFrame(columns=['start_month', 'end_month', 'cheaper_plan', 'cheaper_by_$'])
for i in range(start_month, len(total.index)-12):
    analysis_df = analysis_df.append(cost_comparison_yearly(cost_comparison_quarterly_one_year(i)), ignore_index=True)
analysis_df

Unnamed: 0,start_month,end_month,cheaper_plan,cheaper_by_$
0,2022-03,2023-03,actual cost,21.38
1,2022-04,2023-04,budget cost,7.59
2,2022-05,2023-05,budget cost,13.83
3,2022-06,2023-06,actual cost,27.53
4,2022-07,2023-07,actual cost,6.34
5,2022-08,2023-08,budget cost,13.29
6,2022-09,2023-09,actual cost,17.86
7,2022-10,2023-10,budget cost,25.32
8,2022-11,2023-11,budget cost,17.62
9,2022-12,2023-12,actual cost,18.34


In [None]:
# plots
from matplotlib import pyplot as plt

In [None]:
plt.plot(total.index, total['TOTAL COST'])

In [None]:
total.dtypes

In [None]:
fig, ax = plt.subplots(figsize=(16,8))
ax.bar(x=total['MONTH'].dt.to_timestamp(), height=total['TOTAL COST'], width=25)
ax.xaxis_date()
ax.tick_params(axis='x', rotation=60)
ax.set_xticks(total['MONTH'].dt.to_timestamp())
ax
# color by year