# Validate EIA-930 data against net generation outputs

In [110]:
import pandas as pd
import numpy as np
import os

import plotly.express as px
from datetime import datetime
from datetime import timedelta

In [2]:
year = 2020

In [3]:
# EIA-930 data after timestamp adjustments but no cleaning
raw = pd.read_csv("../data/outputs/2020/eia930/eia930_raw.csv", index_col=0, parse_dates=True)

In [4]:
GEN_ID = "EBA.{}-ALL.NG.H"
path = f"../data/results/{year}/power_sector_data/hourly/us_units/"
cors = {}
percent_difs = {}
annual_gen = {}
for ba_f in os.listdir(path):
    ba = ba_f.replace(".csv", "")
    print(ba, end="...")
    col_name = GEN_ID.format(ba)
    if col_name not in raw.columns: 
        continue
    else:
        dat = pd.read_csv(path+ba_f, parse_dates=["datetime_utc"])
        dat = dat[dat.fuel_category=="total"]
        dat = dat.merge(raw[ col_name], left_on="datetime_utc", right_index=True)
        c = dat[["net_generation_mwh", col_name]].corr().to_numpy()[0,1]
        cors[ba] = c
        difs = (dat[col_name]-dat["net_generation_mwh"])/dat["net_generation_mwh"]
        difs = difs.replace(np.inf, np.nan)
        percent_difs[ba] = difs.median()
        annual_gen[ba] = dat["net_generation_mwh"].sum()

OHMS...FMPP...ERCO...TPWR...AMPL...SOCO...IPCO...PJM...WWA...EEI...AZPS...TEC...DOPD...YAD...IID...HGMA...DEAA...CPLW...SPA...GVL...ORMS...FPL...TAL...SEC...JEA...GCPD...TVA...TIDC...HECO...SCEG...INMS...CPLE...SEPA...AKMS...CHPD...PNM...WAUW...WACM...WALC...NWMT...PACE...AVA...SC...NSB...GWA...HIMS...LGEE...TEPC...ISNE...SRP...GRIS...HST...LDWP...RIMS...FPC...PACW...GRIF...PSEI...AECI...CSTO...DUK...AVRN...CEA...MISO...AEC...PSCO...OVEC...SWPP...BANC...NYIS...EPE...NBSO...SCL...BPAT...NEVP...CISO...PGE...

In [5]:
out = pd.DataFrame(data={"Difference as percent of hourly-egrid":percent_difs, "Correlation":cors, "Annual BA generation":annual_gen})
out = out.sort_values("Annual BA generation", ascending=False)
out.to_csv(f"../data/results/{year}/validation_metrics/us_units/compare_930_hourlyegrid.csv")

# Visualize BA of interest

In [7]:
ba = "SWPP"
col_name = GEN_ID.format(ba)
dat = pd.read_csv(path+ba+".csv", parse_dates=["datetime_utc"])
dat = dat[dat.fuel_category=="total"]
dat = dat.merge(raw[ col_name], left_on="datetime_utc", right_index=True)

px.line(dat, x="datetime_utc", y=["net_generation_mwh", col_name])

# Validate against carbon intensity and emissions data

* Scrape data from Singularity if not already downloaded
* Compare

Data location: 

`data/downloads/singularity/\<year\>_generated_rate.csv`

`data/downloads/singularity/\<year\>_consumed_rate.csv`

To scrape data, env var SINGULARITY_API_KEY should be set 

In [1]:
import requests

In [115]:
year = 2020
ba = "ISNE"


In [131]:
def download_singularity_carbon_intensities(ba, year):
    os.makedirs(f"../data/downloads/singularity/{year}", exist_ok=True)
    fpath = f"../data/downloads/singularity/{year}/{ba}_rate.csv"
    if not os.path.exists(fpath): 
        url = "https://api.singularity.energy/v1/region_events/search"
        headers = {
            'X-Api-Key': os.environ['SINGULARITY_API_KEY'],
        }
        params = {
            'event_type': "carbon_intensity",
            'region': "EIA." + ba
        }

        dat = {}

        # Iterate through weeks in year 
        start = datetime(year=year, month=1, day=1, hour=0, minute=0)
        while start.year == year:
            end = start + timedelta(weeks=1) 
            # Format params for search 
            params["start"] = start.strftime("%Y-%m-%dT%H:%MZ")
            params["end"] = end.strftime("%Y-%m-%dT%H:%MZ")
            # Show some progress 
            print(start.strftime("%Y-%m-%d"), end="...")
            # Update start 
            start = end 

            next = 1
            last = 1

            # Get all pages of data for this week
            while next <= last: 
                params['page'] = next
                response = requests.request("GET", url, headers=headers, params=params)
                if "next" in response.json()['meta']['pagination']: 
                    next = response.json()['meta']['pagination']['next']
                else: 
                    next = next + 1
                last = response.json()['meta']['pagination']['last']
                for d in response.json()['data']:
                    # Only bother if this is real data, not infilled five minute intervals 
                    if d['start_date'][-11:-9] != "00":
                        continue
                    # Only bother if eGRID source year is 2019: some data is computed using 2018 factors
                    if "2019" not in d['meta']['generated_emissions_source']:
                        continue

                    # Parse dat to consistent names based on source 
                    this_dat = dat.get(d['start_date'], {})
                    try:
                        for accounting_type in ["consumed","generated"]:
                            if f"{accounting_type}_rate" not in d["data"].keys():
                                continue
                            if "eq" in d["meta"]["generated_emissions_source"]: # CO2 equivalent
                                this_dat[f"{accounting_type}_eq"] = d["data"][f"{accounting_type}_rate"]
                            elif "u" in d["meta"]["generated_emissions_source"]: # unadjusted
                                this_dat[f"{accounting_type}_u"] = d["data"][f"{accounting_type}_rate"]
                            else: # normal adjustments
                                this_dat[f"{accounting_type}"] = d["data"][f"{accounting_type}_rate"]
                        # Index by start date
                        dat[d['start_date']] = this_dat
                        
                    except:
                        print(f"Failed on \n {d} \n\n")
                        raise Exception
         
        ba_dat = pd.DataFrame(dat).transpose()
        ba_dat.index = pd.to_datetime(ba_dat.index)
        ba_dat.to_csv(fpath)
    else: 
        print(f"File {fpath} exists")




In [130]:
download_singularity_carbon_intensities("ISNE", 2020)

2020-01-01...2020-01-08...2020-01-15...2020-01-22...2020-01-29...2020-02-05...2020-02-12...2020-02-19...2020-02-26...2020-03-04...2020-03-11...2020-03-18...2020-03-25...2020-04-01...2020-04-08...2020-04-15...2020-04-22...2020-04-29...2020-05-06...2020-05-13...2020-05-20...2020-05-27...2020-06-03...2020-06-10...2020-06-17...2020-06-24...2020-07-01...2020-07-08...2020-07-15...2020-07-22...2020-07-29...2020-08-05...2020-08-12...2020-08-19...2020-08-26...2020-09-02...2020-09-09...2020-09-16...2020-09-23...2020-09-30...2020-10-07...2020-10-14...2020-10-21...2020-10-28...2020-11-04...2020-11-11...2020-11-18...2020-11-25...2020-12-02...2020-12-09...2020-12-16...2020-12-23...2020-12-30...

In [132]:
for ba in os.listdir("../data/results/2020/power_sector_data/hourly/us_units/"):
    ba = ba.replace(".csv","")
    print(ba)
    download_singularity_carbon_intensities(ba, 2020)

OHMS
2020-01-01...2020-01-08...2020-01-15...2020-01-22...2020-01-29...2020-02-05...2020-02-12...2020-02-19...2020-02-26...2020-03-04...2020-03-11...2020-03-18...2020-03-25...2020-04-01...2020-04-08...2020-04-15...2020-04-22...2020-04-29...2020-05-06...2020-05-13...2020-05-20...2020-05-27...2020-06-03...2020-06-10...2020-06-17...2020-06-24...2020-07-01...2020-07-08...2020-07-15...2020-07-22...2020-07-29...2020-08-05...2020-08-12...2020-08-19...2020-08-26...2020-09-02...2020-09-09...2020-09-16...2020-09-23...2020-09-30...2020-10-07...2020-10-14...2020-10-21...2020-10-28...2020-11-04...2020-11-11...2020-11-18...2020-11-25...2020-12-02...2020-12-09...2020-12-16...2020-12-23...2020-12-30...FMPP
2020-01-01...2020-01-08...2020-01-15...2020-01-22...2020-01-29...2020-02-05...2020-02-12...2020-02-19...2020-02-26...2020-03-04...2020-03-11...2020-03-18...2020-03-25...2020-04-01...2020-04-08...2020-04-15...2020-04-22...2020-04-29...2020-05-06...2020-05-13...2020-05-20...2020-05-27...2020-06-03...20

In [104]:
px.line(ba_dat, x=ba_dat.index, y=["consumed_u","generated_u","consumed","generated", "generated_eq"])

In [90]:
response.json()['data'][-3]['meta']['generated_emissions_source'][-4:]

'2019'

In [87]:
response.json()['data'][-4]

{'data': {'generated_rate': 547.3641644272594},
 'dedup_key': 'EIA.ISNE:carbon_intensity:EGRID_2019_eq:2020-01-07T23:55:00+00:00',
 'event_type': 'carbon_intensity',
 'meta': {'generated_emissions_source': 'EGRID_2019_eq',
  'inserted_at': '2021-06-04T10:24:20.884818Z',
  'raw_start_date': '2020-01-08T00:00:00+00:00',
  'source': 'generated_fuel_mix:EIA.ISNE:2020-01-07T23:55:00+00:00',
  'unit': 'lbs/MWh'},
 'region': 'EIA.ISNE',
 'start_date': '2020-01-07T23:55:00+00:00'}

In [89]:
response.json()['data'][-2]

{'data': {'generated_rate': 589.5618602768313},
 'dedup_key': 'EIA.ISNE:carbon_intensity:EGRID_2018:2020-01-07T23:55:00+00:00',
 'event_type': 'carbon_intensity',
 'meta': {'generated_emissions_source': 'EGRID_2018',
  'inserted_at': '2020-06-06T22:26:23.466414Z',
  'raw_start_date': '2020-01-08T00:00:00+00:00',
  'source': 'generated_fuel_mix:EIA.ISNE:2020-01-07T23:55:00+00:00',
  'unit': 'lbs/MWh'},
 'region': 'EIA.ISNE',
 'start_date': '2020-01-07T23:55:00+00:00'}