## Production Amounts and Scope 1 and 2 Emission Values Per EAF Plant in the United States

### Loading Libraries

In [1]:
import pandas as pd
import numpy as np
import janitor
import geopandas as gpd
import mapclassify as mc
from shapely.geometry import Point

### Importing eGrid Data

The EPA eGrida Data, which was available for 2021 at the time of download, contains the emissions intensity of electricity produced in each sub region. For this analysis, we assume that each steel plant is drawing 100% of its electricity from the grid (and the eGrid subregion that it is located within).

In [2]:
## Importing grid region electricity values
egrid2021_data = pd.read_excel('../data/eGRID2021_data.xlsx', sheet_name = "SRL21").clean_names(strip_underscores = True).drop(0)

### Standardizing state names and cleaning up rows in eGrid data

In [3]:
## Cleaning egrid data for usability
emissions_by_subregion = egrid2021_data.copy()

emissions_by_subregion["co2e_lbs_per_mwh"] = pd.to_numeric(emissions_by_subregion["egrid_subregion_annual_co2_equivalent_combustion_output_emission_rate_lb_mwh"])
emissions_by_subregion['subregion'] = emissions_by_subregion['egrid_subregion_acronym']
emissions_by_subregion['subregion_name'] = emissions_by_subregion['egrid_subregion_name']
emissions_by_subregion['co2e_tonnes_per_mwh'] = emissions_by_subregion['co2e_lbs_per_mwh'] / 2204.62262185

# Filtering subregions
excluded_subregions = ["AKGD", "AKMS", "HIMS", "HIOA", "PRMS"]
emissions_by_subregion = emissions_by_subregion[~emissions_by_subregion['subregion'].isin(excluded_subregions)]

# Selecting desired columns
emissions_by_subregion = emissions_by_subregion[['subregion', 'subregion_name', 'co2e_tonnes_per_mwh']]

### Importing GEM data

In [4]:
gem_data_readin = pd.read_excel("../data/GEM_2022_data.xlsx", sheet_name = "Steel Plants").clean_names(strip_underscores = True)

### Filtering to only look at plants and data that we are interested in

In [5]:
## eaf_capacity is in thousand tonnes per year and we are standardizing to tonnes
gem_data_cleaned = gem_data_readin.copy()

## Filtering to the specifications we need
gem_data_cleaned = gem_data_cleaned[
       (gem_data_cleaned['country'] == "United States") &
       (gem_data_cleaned["status"] == "operating") &
       gem_data_cleaned['nominal_eaf_steel_capacity_ttpa'].notna()
]

## Needed to do this in another step to make sure start_date was properly filtered
gem_data_cleaned = gem_data_cleaned[
       (gem_data_cleaned['start_date'] < 2022) &
       (~gem_data_cleaned['plant_id'].isin(["SUS00009", "SUS00061"]))
]

gem_data_cleaned = gem_data_cleaned.rename(columns={'plant_name_english':'plant_name'
, 'subnational_unit_province_state':'state'
, 'location_address':'address'})

gem_data_cleaned['eaf_capacity'] = pd.to_numeric(gem_data_cleaned['nominal_eaf_steel_capacity_ttpa'])
gem_data_cleaned['max_tonnes_of_steel_producible_annually'] = gem_data_cleaned['eaf_capacity'] * 1000

gem_data_cleaned = gem_data_cleaned[['plant_id'
         , 'plant_name'
         , 'owner'
         , 'coordinates'
         , 'country'
         , 'state'
         , 'status'
         , 'start_date'
         , 'plant_age_years'
         , 'max_tonnes_of_steel_producible_annually'
         , 'municipality'
         , 'address'
         , 'category_steel_product'
         , 'steel_products'
         , 'responsiblesteel_certification']]

gem_data = gem_data_cleaned.drop(columns=['country', 'start_date', 'status', 'responsiblesteel_certification'])

## Separate the "coordinates" column into "lat" and "lon" columns
gem_data[['lat', 'lon']] = gem_data['coordinates'].str.split(',', expand=True)

## Remove the "coordinates" column
gem_data.drop(columns=['coordinates'], inplace=True)

## Reordering columns
gem_data = gem_data[['plant_id', 'plant_name', 'owner', 'lat', 'lon', 'state', 'plant_age_years', 'max_tonnes_of_steel_producible_annually', 'municipality', 'address', 'category_steel_product', 'steel_products']]

In [27]:
# Reading in data
subregion_shapes_raw = gpd.read_file("../data/egrid2020_subregions/eGRID2020_subregions.shp").clean_names()

# Filtering subregion shapes
# Simplifying subregion shapes
subregion_shapes = subregion_shapes_raw[~subregion_shapes_raw['zipsubregi'].isin(["AKGD", "AKMS", "HIMS", "HIOA", "PRMS"])]

subregion_shapes['geometry'] = subregion_shapes.simplify(tolerance=0.0005)

# Bringing in plant points
plant_points = gpd.GeoDataFrame(gem_data, geometry=gpd.points_from_xy(gem_data['lon'], gem_data['lat']))
plant_points.crs = "EPSG:4326"

plant_emissions_by_subregion = gpd.sjoin(plant_points, subregion_shapes, op='within')

plant_emissions_by_subregion = plant_emissions_by_subregion.drop(columns=['geometry', 'index_right', 'shape_leng', 'shape_le_1', 'shape_area'])

plant_emissions_by_subregion = plant_emissions_by_subregion.rename(columns={'zipsubregi':'subregion'})

### Make sure the final dataset mataches because right now, we are missing columns

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [29]:
plant_emissions_by_subregion.columns

Index(['plant_id', 'plant_name', 'owner', 'lat', 'lon', 'state',
       'plant_age_years', 'max_tonnes_of_steel_producible_annually',
       'municipality', 'address', 'category_steel_product', 'steel_products',
       'subregion'],
      dtype='object')

In [30]:
subregion_shapes_raw

Unnamed: 0,zipsubregi,shape_leng,shape_le_1,shape_area,geometry
0,AKGD,122.099469,122.099469,36.714643,"MULTIPOLYGON (((-151.64962 59.11983, -151.6451..."
1,AKMS,804.533841,804.533841,243.100971,"MULTIPOLYGON (((-179.10754 51.30120, -179.1054..."
2,AZNM,85.346813,85.346813,54.991175,"MULTIPOLYGON (((-104.35820 33.30621, -104.3585..."
3,CAMX,75.567466,75.567466,35.788722,"MULTIPOLYGON (((-117.15658 32.66883, -117.1566..."
4,ERCT,147.354169,147.354169,49.825826,"MULTIPOLYGON (((-97.19964 26.00022, -97.19972 ..."
5,FRCC,147.119699,147.119699,12.279977,"MULTIPOLYGON (((-81.96339 24.52110, -81.96374 ..."
6,HIMS,15.387078,15.387078,1.30461,"MULTIPOLYGON (((-155.90856 19.18114, -155.9089..."
7,HIOA,3.859476,3.859476,0.135622,"MULTIPOLYGON (((-157.71549 21.29144, -157.7154..."
8,MROE,74.366089,74.366089,9.751795,"MULTIPOLYGON (((-87.83248 43.83403, -87.83289 ..."
9,MROW,118.469797,118.469797,109.918613,"MULTIPOLYGON (((-91.42487 40.58896, -91.42515 ..."
