## Production Amounts and Scope 1 and 2 Emission Values Per EAF Plant in the United States

### Loading Libraries

In [23]:
import pandas as pd
import numpy as np
import janitor

NameError: name 'notebook' is not defined

### Importing eGrid Data

The EPA eGrida Data, which was available for 2021 at the time of download, contains the emissions intensity of electricity produced in each sub region. For this analysis, we assume that each steel plant is drawing 100% of its electricity from the grid (and the eGrid subregion that it is located within).

In [46]:
## Importing grid region electricity values
egrid2021_data = pd.read_excel('../data/eGRID2021_data.xlsx', sheet_name = "SRL21").clean_names(strip_underscores = True).drop(0)

### Standardizing state names and cleaning up rows in eGrid data

In [47]:
## Cleaning egrid data for usability
emissions_by_subregion = egrid2021_data.copy()

emissions_by_subregion["co2e_lbs_per_mwh"] = pd.to_numeric(emissions_by_subregion["egrid_subregion_annual_co2_equivalent_combustion_output_emission_rate_lb_mwh"])
emissions_by_subregion['subregion'] = emissions_by_subregion['egrid_subregion_acronym']
emissions_by_subregion['subregion_name'] = emissions_by_subregion['egrid_subregion_name']
emissions_by_subregion['co2e_tonnes_per_mwh'] = emissions_by_subregion['co2e_lbs_per_mwh'] / 2204.62262185

# Filtering subregions
excluded_subregions = ["AKGD", "AKMS", "HIMS", "HIOA", "PRMS"]
emissions_by_subregion = emissions_by_subregion[~emissions_by_subregion['subregion'].isin(excluded_subregions)]

# Selecting desired columns
emissions_by_subregion = emissions_by_subregion[['subregion', 'subregion_name', 'co2e_tonnes_per_mwh']]

### Importing GEM data

In [51]:
gem_data_readin = pd.read_excel("../data/GEM_2022_data.xlsx", sheet_name = "Steel Plants").clean_names(strip_underscores = True)

### Filtering to only look at plants and data that we are interested in

In [62]:
## eaf_capacity is in thousand tonnes per year and we are standardizing to tonnes
gem_data_cleaned = gem_data_readin.copy()

## Filtering to the specifications we need
gem_data_cleaned = gem_data_cleaned[
       (gem_data_cleaned['country'] == "United States") &
       (gem_data_cleaned["status"] == "operating") &
       (gem_data_cleaned['nominal_eaf_steel_capacity_ttpa'] != "NaN")
]

## Needed to do this in another step to make sure start_date was properly filtered
gem_data_cleaned = gem_data_cleaned[
       (gem_data_cleaned['start_date'] < 2022) &
       (~gem_data_cleaned['plant_id'].isin(["SUS00009", "SUS00061"]))
]

""""
mutate(plant_name = plant_name_english
         , state = subnational_unit_province_state
         , eaf_capacity = as.numeric(nominal_eaf_steel_capacity_ttpa)
         , address = location_address
         , max_tonnes_of_steel_producible_annually = eaf_capacity * 1000) %>%
  select(-eaf_capacity) %>% 
  select(plant_id
         , plant_name
         , owner
         , coordinates
         , country
         , state
         , status
         , start_date
         , plant_age_years
         , max_tonnes_of_steel_producible_annually
         , municipality
         , address
         , category_steel_product
         , steel_products
         , responsible_steel_certification)

## Only selecting columns we need
gem_data <- gem_data_cleaned %>% 
  select(-c(country
         , start_date
         , status
         , responsible_steel_certification)) %>% 
  separate(coordinates, c("lat", "lon"), remove = TRUE, ",")

"""

'"\n  filter(country == "United States"\n          %>% \n  mutate(plant_name = plant_name_english\n         , state = subnational_unit_province_state\n         , eaf_capacity = as.numeric(nominal_eaf_steel_capacity_ttpa)\n         , address = location_address\n         , max_tonnes_of_steel_producible_annually = eaf_capacity * 1000) %>%\n  select(-eaf_capacity) %>% \n  select(plant_id\n         , plant_name\n         , owner\n         , coordinates\n         , country\n         , state\n         , status\n         , start_date\n         , plant_age_years\n         , max_tonnes_of_steel_producible_annually\n         , municipality\n         , address\n         , category_steel_product\n         , steel_products\n         , responsible_steel_certification)\n\n## Only selecting columns we need\ngem_data <- gem_data_cleaned %>% \n  select(-c(country\n         , start_date\n         , status\n         , responsible_steel_certification)) %>% \n  separate(coordinates, c("lat", "lon"), remove 