Merge pull request #251 from singularity-energy/v0.1.2

v0.1.2
singularity-energy · Oct 26, 2022 · bce51f7 · bce51f7
2 parents 5f13239 + a761999
commit bce51f7
Show file tree

Hide file tree

Showing 20 changed files with 2,121 additions and 385 deletions.
diff --git a/data/manual/epa_eia_crosswalk_manual.csv b/data/manual/epa_eia_crosswalk_manual.csv
@@ -2,6 +2,8 @@ plant_id_epa,unitid,plant_id_eia,generator_id,notes
 389,3,389,3,CAMD_UNIT_ID matches EIA_GENERATOR_ID
 1363,7A,1363,7A,CAMD_UNIT_ID matches EIA_GENERATOR_ID
 1363,7B,1363,7B,CAMD_UNIT_ID matches EIA_GENERATOR_ID
+1393,1A,1393,1,CAMD_UNIT_ID matches boiler_id and reported fuel consumption matches
+1393,2A,1393,2,CAMD_UNIT_ID matches boiler_id and reported fuel consumption matches
 1393,3,1393,3,CAMD_GENERATOR_ID matches EIA_GENERATOR_ID
 1570,11,1570,11,CAMD_UNIT_ID matches EIA_GENERATOR_ID
 1570,9,1570,9,CAMD_UNIT_ID matches EIA_GENERATOR_ID

diff --git a/environment.yml b/environment.yml
@@ -28,6 +28,6 @@ dependencies:
 
   - pip:
       #- --editable ../pudl #NOTE: this is for Greg's use for development
-      - git+https://github.com/grgmiller/pudl.git@allocate_fuel#egg=catalystcoop.pudl
+      - git+https://github.com/grgmiller/pudl.git@oge#egg=catalystcoop.pudl
       # - --editable ../gridemissions # NOTE: for Gailin's development use
       - git+https://github.com/gailin-p/gridemissions#egg=gridemissions
diff --git a/notebooks/explore_data/explore_intermediate_outputs.ipynb b/notebooks/explore_data/explore_intermediate_outputs.ipynb
@@ -39,13 +39,11 @@
     "year = 2020\n",
     "path_prefix = f\"{year}/\"\n",
     "\n",
-    "cems = pd.read_csv(f'{outputs_folder()}{path_prefix}/cems_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "partial_cems_scaled = pd.read_csv(f'{outputs_folder()}{path_prefix}/partial_cems_scaled_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "eia923_allocated = pd.read_csv(f'{outputs_folder()}{path_prefix}/eia923_allocated_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])\n",
-    "plant_attributes = pd.read_csv(f\"{outputs_folder()}{path_prefix}/plant_static_attributes_{year}.csv\")\n",
-    "primary_fuel_table = plant_attributes.drop_duplicates(subset=\"plant_id_eia\")[[\"plant_id_eia\", \"plant_primary_fuel\"]]\n",
-    "residual_profiles = pd.read_csv(f\"{outputs_folder()}{path_prefix}/residual_profiles_{year}.csv\")\n",
-    "shaped_eia_data = pd.read_csv(f\"{outputs_folder()}{path_prefix}/shaped_eia923_data_{year}.csv\")"
+    "cems = pd.read_csv(outputs_folder(f\"{path_prefix}/cems_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
+    "partial_cems_plant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_plant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
+    "partial_cems_subplant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_subplant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
+    "eia923_allocated = pd.read_csv(outputs_folder(f\"{path_prefix}/eia923_allocated_{year}.csv\"), dtype=get_dtypes(), parse_dates=['report_date'])\n",
+    "plant_attributes = pd.read_csv(outputs_folder(f\"{path_prefix}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())"
    ]
   },
   {

diff --git a/notebooks/explore_data/gens_not_in_cems.ipynb b/notebooks/explore_data/gens_not_in_cems.ipynb
@@ -0,0 +1,257 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import packages\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import plotly.express as px\n",
+    "\n",
+    "%reload_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "# # Tell python where to look for modules.\n",
+    "import sys\n",
+    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "import download_data\n",
+    "import load_data\n",
+    "from column_checks import get_dtypes\n",
+    "from filepaths import *\n",
+    "import impute_hourly_profiles\n",
+    "import data_cleaning\n",
+    "import output_data\n",
+    "import emissions\n",
+    "import validation\n",
+    "import gross_to_net_generation\n",
+    "import eia930\n",
+    "\n",
+    "year = 2020\n",
+    "path_prefix = f\"{year}/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load inputs to function\n",
+    "cems = pd.read_csv(outputs_folder(f\"{path_prefix}/cems_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
+    "partial_cems_plant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_plant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
+    "partial_cems_subplant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_subplant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
+    "eia923_allocated = pd.read_csv(outputs_folder(f\"{path_prefix}/eia923_allocated_{year}.csv\"), dtype=get_dtypes(), parse_dates=['report_date'])\n",
+    "plant_attributes = pd.read_csv(outputs_folder(f\"{path_prefix}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())\n",
+    "\n",
+    "# select eia only data \n",
+    "eia_only_data = eia923_allocated[\n",
+    "    (eia923_allocated[\"hourly_data_source\"] == \"eia\")\n",
+    "    & ~(eia923_allocated[\"fuel_consumed_mmbtu\"].isna())\n",
+    "].copy()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Why are NOx emissions from non-CEMS plants so high in CAISO?\n",
+    "According to the data source, about 70% of emitting generation is represented in CEMS, but only 4% of NOx emissions are. Do 30% of plants account for 96% of NOx emissions?\n",
+    "\n",
+    "Are we over-counting NOx from non-cems plants?\n",
+    "Is there a lot of missing nox data for CEMS plants?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add ba codes and plant primary fuel to all of the data\n",
+    "eia_only_data = eia_only_data.merge(\n",
+    "    plant_attributes[[\"plant_id_eia\", \"ba_code\", \"plant_primary_fuel\"]],\n",
+    "    how=\"left\",\n",
+    "    on=\"plant_id_eia\",\n",
+    "    validate=\"m:1\",\n",
+    ")\n",
+    "cems = cems.merge(\n",
+    "    plant_attributes[[\"plant_id_eia\", \"ba_code\", \"plant_primary_fuel\"]],\n",
+    "    how=\"left\",\n",
+    "    on=\"plant_id_eia\",\n",
+    "    validate=\"m:1\",\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cems_caiso = cems[cems[\"ba_code\"] == \"CISO\"].copy()\n",
+    "eia_caiso = eia_only_data[eia_only_data[\"ba_code\"] == \"CISO\"].copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cems_caiso[\"nox_mass_lb_for_electricity\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia_caiso[\"nox_mass_lb_for_electricity\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia_caiso[\"nox_rate\"] = eia_caiso[\"nox_mass_lb_for_electricity\"] / eia_caiso[\"net_generation_mwh\"]\n",
+    "eia_caiso[\"nox_rate\"] = eia_caiso[\"nox_rate\"].replace(np.inf, np.nan)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia_caiso.groupby([\"prime_mover_code\",\"energy_source_code\",])[\"nox_mass_lb_for_electricity\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia_caiso.groupby([\"prime_mover_code\"])[\"nox_mass_lb_for_electricity\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia_caiso.groupby([\"energy_source_code\"])[\"nox_mass_lb_for_electricity\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia_caiso[\"nox_mass_lb_for_electricity\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Investigate the capacity factor of plants in each dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subplant_nameplate = gross_to_net_generation.calculate_subplant_nameplate_capacity(year)\n",
+    "\n",
+    "pudl_out = load_data.initialize_pudl_out(year)\n",
+    "gen_cap = pudl_out.gens_eia860()[[\"plant_id_eia\",\"generator_id\",\"capacity_mw\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia_cf = eia_only_data.merge(gen_cap, how=\"left\", on=[\"plant_id_eia\",\"generator_id\"], validate=\"m:1\")\n",
+    "eia_cf[\"capfac\"] = eia_cf.net_generation_mwh / (eia_cf.report_date.dt.days_in_month * 24 * eia_cf.capacity_mw)\n",
+    "eia_cf.loc[eia_cf[\"capfac\"] > 1.2, \"capfac\"] = np.NaN\n",
+    "eia_cf.loc[eia_cf[\"capfac\"] < 0, \"capfac\"] = np.NaN\n",
+    "eia_cf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "px.histogram(eia_cf, x=\"capfac\", nbins=15, histnorm=\"percent\", width=500).update_xaxes(dtick=0.05)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cems_cf = cems.merge(subplant_nameplate, how=\"left\", on=[\"plant_id_eia\",\"subplant_id\"])\n",
+    "cems_cf = cems_cf.groupby([\"plant_id_eia\",\"subplant_id\"])[[\"net_generation_mwh\",\"capacity_mw\"]].sum()\n",
+    "cems_cf = cems_cf[cems_cf[\"capacity_mw\"] > 0]\n",
+    "cems_cf['capfac'] = cems_cf['net_generation_mwh'] / cems_cf['capacity_mw']\n",
+    "cems_cf.loc[cems_cf[\"capfac\"] > 1.2, \"capfac\"] = np.NaN\n",
+    "cems_cf.loc[cems_cf[\"capfac\"] < 0, \"capfac\"] = np.NaN\n",
+    "cems_cf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "px.histogram(cems_cf, x=\"capfac\", nbins=15, histnorm=\"percent\", width=500).update_xaxes(dtick=0.05)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.4 ('open_grid_emissions')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "25e36f192ecdbe5da57d9bea009812e7b11ef0e0053366a845a2802aae1b29d2"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}