In [106]:
import os

import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


response = requests.get("https://www.oeb.ca/consumer-information-and-protection/electricity-rates/historical-electricity-rates")
soup = BeautifulSoup(response.content, "html.parser")

In [107]:
filepath = os.path.abspath(os.path.join("..", "data", "raw", "oeb.ca", "historcial-electricity-rates.html"))
with open(filepath, "w") as f:
    f.write(response.content.decode("utf-8"))

In [125]:
rate_types = [h2.contents[1] for h2 in soup.find_all(name="h2") if len(h2.contents) > 1]
rate_types


['Time-of-Use (TOU) rates', 'Ultra-Low Overnight (ULO)', 'Tiered rates']

In [126]:
tables = soup.find_all(name="table")
len(tables)

3

In [242]:
table = tables[2]
rows = table.find_all(name="tr")
headers = [th.text for th in rows[0].find_all("th")]
rows = [[td.text.replace("\n", "") for td in tr.find_all(name="td")] for tr in rows[1:]] 
rows

[['Nov 1, 2022', '8.7', '600 (Summer)\t\t\t1,000 (Winter)', '10.3'],
 ['Feb 8, 2022', '9.8', '1,000', '11.5'],
 ['Jan 18, 2022', '8.2', '1,000', '8.2'],
 ['Nov 1, 2021', '9.8', '1,000', '11.5'],
 ['May 1, 2021', '9.8', '600', '11.5'],
 ['Feb 23, 2021', '10.1', '1,000', '11.8'],
 ['Jan 1, 2021', '8.5', '1,000', '8.5'],
 ['Nov 1, 2020', '12.6', '1,000', '14.6'],
 ['May 1, 2020', '11.9', '1,000', '13.9'],
 ['Nov 1, 2019', '11.9', '1,000', '13.9'],
 ['May 1, 2019', '7.7', '600', '8.9'],
 ['May 1, 2018', '7.7', '600 (Summer)\t\t\t1,000 (Winter)', '8.9'],
 ['Jul 1, 2017', '7.7', '600 (Summer)\t\t\t1,000 (Winter)', '9.0'],
 ['May 1, 2017', '9.1', '600', '10.6'],
 ['Nov 1, 2016', '10.3', '1,000', '12.1'],
 ['May 1, 2016', '10.3', '600', '12.1'],
 ['Nov 1, 2015', '9.9', '1,000', '11.6'],
 ['May 1, 2015', '9.4', '600', '11.0'],
 ['Nov 1, 2014', '8.8', '1,000', '10.3'],
 ['May 1, 2014', '8.6', '600', '10.1'],
 ['Nov 1, 2013', '8.3', '1,000', '9.7'],
 ['May 1, 2013', '7.8', '600', '9.1'],
 ['Nov 1

In [268]:
# Time-of-Use (TOU) rates
# Ultra-Low Overnight (ULO)
# Tiered rates

def convert_table_to_df(table):
    rows = table.find_all(name="tr")
    headers = [th.text.replace("\xa0", " ") for th in rows[0].find_all("th")]
    rows = [[td.text.replace("\n", "").replace("*", "") for td in tr.find_all(name="td")] for tr in rows[1:]]
    df = pd.DataFrame(rows, columns=headers).set_index("Effective date")
    df.index = pd.to_datetime(df.index)
    for col in df.columns:
        df[col] = (
            df[col].str.replace(" ¢ per kWh", "")
            .str.replace("\xa0", " ")
        )
    if "Residential threshold for lower tier price (kWh per month)" in df.columns:
        pat = r"(?P<summer>\S+) \(Summer\)\t\t\t(?P<winter>\S+) \(Winter\)"
        special = df['Residential threshold for lower tier price (kWh per month)'].str.extract(pat).dropna()
        df["Residential threshold for lower tier price (kWh per month) [Summer]"] = df["Residential threshold for lower tier price (kWh per month)"]
        df["Residential threshold for lower tier price (kWh per month) [Winter]"] = df["Residential threshold for lower tier price (kWh per month)"]
        df = df.drop(columns=["Residential threshold for lower tier price (kWh per month)"])
        df.loc[special.index, "Residential threshold for lower tier price (kWh per month) [Summer]"] = special["summer"]
        df.loc[special.index, "Residential threshold for lower tier price (kWh per month) [Winter]"] = special["winter"]
        na_index = df[df["Higher tier price (¢ per kWh)"].isna()].index
        df.loc[na_index, "Higher tier price (¢ per kWh)"] = df.loc[na_index, "Lower tier price (¢ per kWh)"]

    for col in df.columns:
        df[col] = df[col].str.replace(",", "").astype(float)

    return df

df_tou = convert_table_to_df(tables[0])
df_ulo = convert_table_to_df(tables[1])
df_tiered = convert_table_to_df(tables[2])

df_tiered

Unnamed: 0_level_0,Lower tier price (¢ per kWh),Higher tier price (¢ per kWh),Residential threshold for lower tier price (kWh per month) [Summer],Residential threshold for lower tier price (kWh per month) [Winter]
Effective date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-11-01,8.7,10.3,600.0,1000.0
2022-02-08,9.8,11.5,1000.0,1000.0
2022-01-18,8.2,8.2,1000.0,1000.0
2021-11-01,9.8,11.5,1000.0,1000.0
2021-05-01,9.8,11.5,600.0,600.0
2021-02-23,10.1,11.8,1000.0,1000.0
2021-01-01,8.5,8.5,1000.0,1000.0
2020-11-01,12.6,14.6,1000.0,1000.0
2020-05-01,11.9,13.9,1000.0,1000.0
2019-11-01,11.9,13.9,1000.0,1000.0
