Skip to content

Commit

Permalink
Merge pull request #251 from singularity-energy/v0.1.2
Browse files Browse the repository at this point in the history
v0.1.2
  • Loading branch information
gailin-p committed Oct 26, 2022
2 parents 5f13239 + a761999 commit bce51f7
Show file tree
Hide file tree
Showing 20 changed files with 2,121 additions and 385 deletions.
2 changes: 2 additions & 0 deletions data/manual/epa_eia_crosswalk_manual.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plant_id_epa,unitid,plant_id_eia,generator_id,notes
389,3,389,3,CAMD_UNIT_ID matches EIA_GENERATOR_ID
1363,7A,1363,7A,CAMD_UNIT_ID matches EIA_GENERATOR_ID
1363,7B,1363,7B,CAMD_UNIT_ID matches EIA_GENERATOR_ID
1393,1A,1393,1,CAMD_UNIT_ID matches boiler_id and reported fuel consumption matches
1393,2A,1393,2,CAMD_UNIT_ID matches boiler_id and reported fuel consumption matches
1393,3,1393,3,CAMD_GENERATOR_ID matches EIA_GENERATOR_ID
1570,11,1570,11,CAMD_UNIT_ID matches EIA_GENERATOR_ID
1570,9,1570,9,CAMD_UNIT_ID matches EIA_GENERATOR_ID
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ dependencies:

- pip:
#- --editable ../pudl #NOTE: this is for Greg's use for development
- git+https://github.com/grgmiller/pudl.git@allocate_fuel#egg=catalystcoop.pudl
- git+https://github.com/grgmiller/pudl.git@oge#egg=catalystcoop.pudl
# - --editable ../gridemissions # NOTE: for Gailin's development use
- git+https://github.com/gailin-p/gridemissions#egg=gridemissions
12 changes: 5 additions & 7 deletions notebooks/explore_data/explore_intermediate_outputs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,11 @@
"year = 2020\n",
"path_prefix = f\"{year}/\"\n",
"\n",
"cems = pd.read_csv(f'{outputs_folder()}{path_prefix}/cems_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
"partial_cems_scaled = pd.read_csv(f'{outputs_folder()}{path_prefix}/partial_cems_scaled_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
"eia923_allocated = pd.read_csv(f'{outputs_folder()}{path_prefix}/eia923_allocated_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])\n",
"plant_attributes = pd.read_csv(f\"{outputs_folder()}{path_prefix}/plant_static_attributes_{year}.csv\")\n",
"primary_fuel_table = plant_attributes.drop_duplicates(subset=\"plant_id_eia\")[[\"plant_id_eia\", \"plant_primary_fuel\"]]\n",
"residual_profiles = pd.read_csv(f\"{outputs_folder()}{path_prefix}/residual_profiles_{year}.csv\")\n",
"shaped_eia_data = pd.read_csv(f\"{outputs_folder()}{path_prefix}/shaped_eia923_data_{year}.csv\")"
"cems = pd.read_csv(outputs_folder(f\"{path_prefix}/cems_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
"partial_cems_plant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_plant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
"partial_cems_subplant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_subplant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
"eia923_allocated = pd.read_csv(outputs_folder(f\"{path_prefix}/eia923_allocated_{year}.csv\"), dtype=get_dtypes(), parse_dates=['report_date'])\n",
"plant_attributes = pd.read_csv(outputs_folder(f\"{path_prefix}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())"
]
},
{
Expand Down
257 changes: 257 additions & 0 deletions notebooks/explore_data/gens_not_in_cems.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import packages\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import plotly.express as px\n",
"\n",
"%reload_ext autoreload\n",
"%autoreload 2\n",
"\n",
"# # Tell python where to look for modules.\n",
"import sys\n",
"sys.path.append('../../../open-grid-emissions/src/')\n",
"\n",
"import download_data\n",
"import load_data\n",
"from column_checks import get_dtypes\n",
"from filepaths import *\n",
"import impute_hourly_profiles\n",
"import data_cleaning\n",
"import output_data\n",
"import emissions\n",
"import validation\n",
"import gross_to_net_generation\n",
"import eia930\n",
"\n",
"year = 2020\n",
"path_prefix = f\"{year}/\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load inputs to function\n",
"cems = pd.read_csv(outputs_folder(f\"{path_prefix}/cems_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
"partial_cems_plant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_plant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
"partial_cems_subplant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_subplant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
"eia923_allocated = pd.read_csv(outputs_folder(f\"{path_prefix}/eia923_allocated_{year}.csv\"), dtype=get_dtypes(), parse_dates=['report_date'])\n",
"plant_attributes = pd.read_csv(outputs_folder(f\"{path_prefix}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())\n",
"\n",
"# select eia only data \n",
"eia_only_data = eia923_allocated[\n",
" (eia923_allocated[\"hourly_data_source\"] == \"eia\")\n",
" & ~(eia923_allocated[\"fuel_consumed_mmbtu\"].isna())\n",
"].copy()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Why are NOx emissions from non-CEMS plants so high in CAISO?\n",
"According to the data source, about 70% of emitting generation is represented in CEMS, but only 4% of NOx emissions are. Do 30% of plants account for 96% of NOx emissions?\n",
"\n",
"Are we over-counting NOx from non-cems plants?\n",
"Is there a lot of missing nox data for CEMS plants?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# add ba codes and plant primary fuel to all of the data\n",
"eia_only_data = eia_only_data.merge(\n",
" plant_attributes[[\"plant_id_eia\", \"ba_code\", \"plant_primary_fuel\"]],\n",
" how=\"left\",\n",
" on=\"plant_id_eia\",\n",
" validate=\"m:1\",\n",
")\n",
"cems = cems.merge(\n",
" plant_attributes[[\"plant_id_eia\", \"ba_code\", \"plant_primary_fuel\"]],\n",
" how=\"left\",\n",
" on=\"plant_id_eia\",\n",
" validate=\"m:1\",\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cems_caiso = cems[cems[\"ba_code\"] == \"CISO\"].copy()\n",
"eia_caiso = eia_only_data[eia_only_data[\"ba_code\"] == \"CISO\"].copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cems_caiso[\"nox_mass_lb_for_electricity\"].sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eia_caiso[\"nox_mass_lb_for_electricity\"].sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eia_caiso[\"nox_rate\"] = eia_caiso[\"nox_mass_lb_for_electricity\"] / eia_caiso[\"net_generation_mwh\"]\n",
"eia_caiso[\"nox_rate\"] = eia_caiso[\"nox_rate\"].replace(np.inf, np.nan)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eia_caiso.groupby([\"prime_mover_code\",\"energy_source_code\",])[\"nox_mass_lb_for_electricity\"].sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eia_caiso.groupby([\"prime_mover_code\"])[\"nox_mass_lb_for_electricity\"].sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eia_caiso.groupby([\"energy_source_code\"])[\"nox_mass_lb_for_electricity\"].sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eia_caiso[\"nox_mass_lb_for_electricity\"].sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Investigate the capacity factor of plants in each dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"subplant_nameplate = gross_to_net_generation.calculate_subplant_nameplate_capacity(year)\n",
"\n",
"pudl_out = load_data.initialize_pudl_out(year)\n",
"gen_cap = pudl_out.gens_eia860()[[\"plant_id_eia\",\"generator_id\",\"capacity_mw\"]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eia_cf = eia_only_data.merge(gen_cap, how=\"left\", on=[\"plant_id_eia\",\"generator_id\"], validate=\"m:1\")\n",
"eia_cf[\"capfac\"] = eia_cf.net_generation_mwh / (eia_cf.report_date.dt.days_in_month * 24 * eia_cf.capacity_mw)\n",
"eia_cf.loc[eia_cf[\"capfac\"] > 1.2, \"capfac\"] = np.NaN\n",
"eia_cf.loc[eia_cf[\"capfac\"] < 0, \"capfac\"] = np.NaN\n",
"eia_cf"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"px.histogram(eia_cf, x=\"capfac\", nbins=15, histnorm=\"percent\", width=500).update_xaxes(dtick=0.05)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cems_cf = cems.merge(subplant_nameplate, how=\"left\", on=[\"plant_id_eia\",\"subplant_id\"])\n",
"cems_cf = cems_cf.groupby([\"plant_id_eia\",\"subplant_id\"])[[\"net_generation_mwh\",\"capacity_mw\"]].sum()\n",
"cems_cf = cems_cf[cems_cf[\"capacity_mw\"] > 0]\n",
"cems_cf['capfac'] = cems_cf['net_generation_mwh'] / cems_cf['capacity_mw']\n",
"cems_cf.loc[cems_cf[\"capfac\"] > 1.2, \"capfac\"] = np.NaN\n",
"cems_cf.loc[cems_cf[\"capfac\"] < 0, \"capfac\"] = np.NaN\n",
"cems_cf"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"px.histogram(cems_cf, x=\"capfac\", nbins=15, histnorm=\"percent\", width=500).update_xaxes(dtick=0.05)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.4 ('open_grid_emissions')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "25e36f192ecdbe5da57d9bea009812e7b11ef0e0053366a845a2802aae1b29d2"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit bce51f7

Please sign in to comment.