[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ryanfobel/gridwatch-history/main)

# Calculate CO2e intensity for the Ontario grid

Try to calculate the grid intensity from publically available data to see if we can match the data provided by gridwatch. See [issue #1](https://github.com/ryanfobel/gridwatch-history/issues/1) on github.

In [97]:
%load_ext autoreload
%autoreload 2

import os
import sys

sys.path.insert(0, os.path.join(".."))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams

from gridwatch_scraper import load_file

%matplotlib inline

rcParams.update({"figure.figsize": (12, 6)})

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [98]:
# Download IESO historical data
import requests

def download_url(url, ext='.xlsx'):
    filename = os.path.join('..', 'data', 'raw', 'IESO', os.path.splitext(url.split('/')[-1])[0] + ext)
    if not os.path.exists(filename):
        print(f"Download { filename }")
        r = requests.get(url)
        if r.ok:
            with open(filename,'wb') as output_file:
                output_file.write(r.content)
        else:
            print("Error downloading file")

for year in range(2010, 2019):
    url = f"https://ieso.ca/-/media/Files/IESO/Power-Data/data-directory/GOC-{ year }.ashx"
    download_url(url)
    
# Special case: 2019 Jan-Apr
download_url('https://ieso.ca/-/media/Files/IESO/Power-Data/data-directory/GOC-2019-Jan-April.ashx')    

# Rest of 2019
year = 2019
for month in range(6,13):
    url = "http://reports.ieso.ca/public/GenOutputCapabilityMonth/PUB_GenOutputCapabilityMonth_%d%02d.csv" % (year, month)
    download_url(url, ext='.csv')
    
# 2020 to present
for year in range(2020, 2024):
    print(year)
    for month in range(1,13):
        url = "http://reports.ieso.ca/public/GenOutputCapabilityMonth/PUB_GenOutputCapabilityMonth_%d%02d.csv" % (year, month)
        download_url(url, ext='.csv')

2020
2021
2022
2023
Download ..\data\raw\IESO\PUB_GenOutputCapabilityMonth_202305.csv
Error downloading file
Download ..\data\raw\IESO\PUB_GenOutputCapabilityMonth_202306.csv
Error downloading file
Download ..\data\raw\IESO\PUB_GenOutputCapabilityMonth_202307.csv
Error downloading file
Download ..\data\raw\IESO\PUB_GenOutputCapabilityMonth_202308.csv
Error downloading file
Download ..\data\raw\IESO\PUB_GenOutputCapabilityMonth_202309.csv
Error downloading file
Download ..\data\raw\IESO\PUB_GenOutputCapabilityMonth_202310.csv
Error downloading file
Download ..\data\raw\IESO\PUB_GenOutputCapabilityMonth_202311.csv
Error downloading file
Download ..\data\raw\IESO\PUB_GenOutputCapabilityMonth_202312.csv
Error downloading file


In [99]:
def cleanup(df, date_col="DATE", hour_col="HOUR"):
    df = df[pd.notna(df[date_col])]
    df[hour_col] = df[hour_col] - 1
    df.index = pd.to_datetime([f'{row[date_col].date().isoformat()} {int(row[hour_col]):02}:00:00' for index, row in df.iterrows()])
    return df.drop(columns=[date_col, hour_col])

for year in range(2010, 2019):
    output_path = os.path.join("..", "data", "clean", "IESO", f"{year}.csv")
    if not os.path.exists(output_path):
        print(year)
        url = f"https://ieso.ca/-/media/Files/IESO/Power-Data/data-directory/GOC-{ year }.ashx"
        filename = os.path.join('..', 'data', 'raw', 'IESO', os.path.splitext(url.split('/')[-1])[0] + ".xlsx")
        df = pd.read_excel(filename, engine='openpyxl')
        try:
            df = cleanup(df)
        except KeyError:
            df = cleanup(df, date_col="Date", hour_col="Hour")
        df.to_csv(output_path)

In [100]:

years = range(2010, 2020)
df = pd.DataFrame(
    {
        "url": [f"https://ieso.ca/-/media/Files/IESO/Power-Data/data-directory/GOC-{ year }.ashx" for year in years]
    },
    index=years
)
df.loc[2019, "url"] = 'https://ieso.ca/-/media/Files/IESO/Power-Data/data-directory/GOC-2019-Jan-April.ashx'
df["ext"] = ".xlsx"
df

Unnamed: 0,url,ext
2010,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx
2011,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx
2012,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx
2013,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx
2014,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx
2015,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx
2016,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx
2017,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx
2018,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx
2019,https://ieso.ca/-/media/Files/IESO/Power-Data/...,.xlsx


In [101]:
# # Rest of 2019
# year = 2019
# for month in range(6,13):
#     url = "http://reports.ieso.ca/public/GenOutputCapabilityMonth/PUB_GenOutputCapabilityMonth_%d%02d.csv" % (year, month)
#     download_url(url, ext='.csv')

from glob import glob

df = pd.DataFrame({
    "filepath": glob(os.path.join("..", "data", "raw", "IESO", "PUB_GenOutputCapabilityMonth_*"))
})
df["filename"] = [os.path.basename(fn) for fn in df["filepath"]]
df = pd.concat([
    df,
    df["filename"].str.extract(r'PUB_GenOutputCapabilityMonth_(?P<year>\d{4})(?P<month>\d{2})')
], axis=1)

df

Unnamed: 0,filepath,filename,year,month
0,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_201906.csv,2019,6
1,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_201907.csv,2019,7
2,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_201908.csv,2019,8
3,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_201909.csv,2019,9
4,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_201910.csv,2019,10
5,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_201911.csv,2019,11
6,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_201912.csv,2019,12
7,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_202001.csv,2020,1
8,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_202002.csv,2020,2
9,..\data\raw\IESO\PUB_GenOutputCapabilityMonth_...,PUB_GenOutputCapabilityMonth_202003.csv,2020,3


In [102]:
from CA import fetch_production
import arrow

now = arrow.now()

In [103]:
# 3-months of historical data available via IESO xml feed [1]
# 1. https://github.com/electricitymaps/electricitymaps-contrib/blob/master/parsers/CA_ON.py)
data = fetch_production(target_datetime=now.shift(months=-3).datetime)
print(len(data))
data

24


[{'datetime': datetime.datetime(2023, 1, 23, 1, 0, tzinfo=datetime.timezone(datetime.timedelta(-1, 68400), 'UTC-5')),
  'zoneKey': 'CA-ON',
  'production': {'biomass': 22.0,
   'gas': 664.0,
   'hydro': 4559.0,
   'nuclear': 9977.0,
   'solar': 0.0,
   'wind': 309.0},
  'storage': {},
  'source': 'ieso.ca'},
 {'datetime': datetime.datetime(2023, 1, 23, 2, 0, tzinfo=datetime.timezone(datetime.timedelta(-1, 68400), 'UTC-5')),
  'zoneKey': 'CA-ON',
  'production': {'biomass': 22.0,
   'gas': 407.0,
   'hydro': 4403.0,
   'nuclear': 9972.0,
   'solar': 0.0,
   'wind': 416.0},
  'storage': {},
  'source': 'ieso.ca'},
 {'datetime': datetime.datetime(2023, 1, 23, 3, 0, tzinfo=datetime.timezone(datetime.timedelta(-1, 68400), 'UTC-5')),
  'zoneKey': 'CA-ON',
  'production': {'biomass': 22.0,
   'gas': 619.0,
   'hydro': 4130.0,
   'nuclear': 9973.0,
   'solar': 0.0,
   'wind': 542.0},
  'storage': {},
  'source': 'ieso.ca'},
 {'datetime': datetime.datetime(2023, 1, 23, 4, 0, tzinfo=datetime.tim

In [108]:
df = pd.json_normalize(fetch_production(target_datetime=now))
print(now)
df

2023-04-23T15:47:25.111187-04:00


Unnamed: 0,datetime,zoneKey,source,production.biomass,production.gas,production.hydro,production.nuclear,production.solar,production.wind
0,2023-04-23 01:00:00-05:00,CA-ON,ieso.ca,0.0,151.0,4609.0,8709.0,0.0,1335.0
1,2023-04-23 02:00:00-05:00,CA-ON,ieso.ca,0.0,279.0,4321.0,8705.0,0.0,1387.0
2,2023-04-23 03:00:00-05:00,CA-ON,ieso.ca,0.0,380.0,4362.0,8705.0,0.0,1394.0
3,2023-04-23 04:00:00-05:00,CA-ON,ieso.ca,0.0,507.0,4491.0,8703.0,0.0,1353.0
4,2023-04-23 05:00:00-05:00,CA-ON,ieso.ca,0.0,506.0,4748.0,8705.0,0.0,1211.0
5,2023-04-23 06:00:00-05:00,CA-ON,ieso.ca,0.0,496.0,5259.0,8705.0,1.0,1062.0
6,2023-04-23 07:00:00-05:00,CA-ON,ieso.ca,0.0,502.0,5620.0,8704.0,26.0,1033.0
7,2023-04-23 08:00:00-05:00,CA-ON,ieso.ca,0.0,521.0,5982.0,8704.0,80.0,829.0
8,2023-04-23 09:00:00-05:00,CA-ON,ieso.ca,0.0,517.0,5831.0,8701.0,160.0,833.0
9,2023-04-23 10:00:00-05:00,CA-ON,ieso.ca,0.0,501.0,5313.0,8702.0,223.0,844.0


![biomass](images/co2signal/biomass.png)
![coal](images/co2signal/coal.png)
![gas](images/co2signal/gas.png)
![geothermal](images/co2signal/geothermal.png)
![hydro](images/co2signal/hydro.png)
![nuclear](images/co2signal/nuclear.png)
![oil](images/co2signal/oil.png)
![solar](images/co2signal/solar.png)
![wind](images/co2signal/wind.png)

In [105]:
df_intensity = pd.DataFrame({
    "biomass": {
        "carbon_intensity": 230,
        "source": "IPCC 2014"
    },
    "coal": {
        "carbon_intensity": 820,
        "source": "IPCC 2014"
    },
    "gas": {
        "carbon_intensity": 490,
        "source": "IPCC 2014"
    },
    "geothermal": {
        "carbon_intensity": 38,
        "source": "IPCC 2014"
    },
    "hydro": {
        "carbon_intensity": 24,
        "source": "IPCC 2014"
    },
    "nuclear": {
        "carbon_intensity": 5,
        "source": "Mallia, E., Lewis, G. 'Life cycle greenhouse gas emissions of electricity generation in the province of Ontario, Canada' (2013)"
    },
    "oil": {
        "carbon_intensity": 650,
        "source": "UK POST 2014"
    },
    "solar": {
        "carbon_intensity": 45,
        "source": "IPCC 2014"
    },
    "wind": {
        "carbon_intensity": 11,
        "source": "IPCC 2014"
    },
})
df_intensity

Unnamed: 0,biomass,coal,gas,geothermal,hydro,nuclear,oil,solar,wind
carbon_intensity,230,820,490,38,24,5,650,45,11
source,IPCC 2014,IPCC 2014,IPCC 2014,IPCC 2014,IPCC 2014,"Mallia, E., Lewis, G. 'Life cycle greenhouse g...",UK POST 2014,IPCC 2014,IPCC 2014


In [106]:
sources = [col[len("production."):] for col in df.columns if col.startswith("production.")]

df["production.total"] = 0
for source in sources:
    df["production.total"] = df["production.total"] + df[f"production.{source}"]

df["emissions.total"] = 0
for source in sources:
    df[f"emissions.{source}"] = df[f"production.{source}"] * df_intensity[source].carbon_intensity
    df["emissions.total"] = df["emissions.total"] + df[f"emissions.{source}"]

In [107]:
df["carbon_intensity"] = df["emissions.total"] / df["production.total"] # kg/MWh or g/kWh

# production in MW, emissions in kg CO2e, carbon intensity (gCO2e/kWh)
df[["emissions.total", "production.total", "carbon_intensity"]]

Unnamed: 0,emissions.total,production.total,carbon_intensity
0,242836.0,14804.0,16.403404
1,299196.0,14692.0,20.364552
2,349747.0,14841.0,23.566269
3,414612.0,15054.0,27.54165
4,418738.0,15170.0,27.603032
5,424508.0,15523.0,27.347033
6,436913.0,15885.0,27.504753
7,455097.0,16116.0,28.238831
8,453142.0,16042.0,28.247226
9,435831.0,15583.0,27.968363
