## Production Amounts and Scope 1 and 2 Emission Values Per EAF Plant in the United States

### Loading Libraries

In [1]:
import pandas as pd  ## Data manipulation
import numpy as np  ## Numerical computing
import janitor ## Cleaning column names
import geopandas as gpd ## Reading in spatial data
import re ## Regex

### Importing eGrid Data

The EPA eGrida Data, which was available for 2021 at the time of download, contains the emissions intensity of electricity produced in each sub region. For this analysis, we assume that each steel plant is drawing 100% of its electricity from the grid (and the eGrid subregion that it is located within).

In [2]:
## Importing grid region electricity values
egrid2021_data = pd.read_excel('../data/eGRID2021_data.xlsx', sheet_name = "SRL21").clean_names(strip_underscores = True).drop(0)

### Standardizing state names and cleaning up rows in eGrid data

In [3]:
## Cleaning egrid data for usability
emissions_by_subregion = egrid2021_data.copy()

## Data Processing on columns
emissions_by_subregion["co2e_lbs_per_mwh"] = pd.to_numeric(emissions_by_subregion["egrid_subregion_annual_co2_equivalent_combustion_output_emission_rate_lb_mwh"])
emissions_by_subregion['subregion'] = emissions_by_subregion['egrid_subregion_acronym']
emissions_by_subregion['subregion_name'] = emissions_by_subregion['egrid_subregion_name']
emissions_by_subregion['co2e_tonnes_per_mwh'] = emissions_by_subregion['co2e_lbs_per_mwh'] / 2204.62262185

## Filtering out non-continental US subregions
excluded_subregions = ["AKGD", "AKMS", "HIMS", "HIOA", "PRMS"]
emissions_by_subregion = emissions_by_subregion[~emissions_by_subregion['subregion'].isin(excluded_subregions)]

## Selecting desired columns
emissions_by_subregion = emissions_by_subregion[['subregion', 'subregion_name', 'co2e_tonnes_per_mwh']]

### Importing GEM data

The GEM data was provided by our client Caitlin Swalec and includes the plant names, locations, and plant capacities that we will be reviewing for the EAF steel plants.

In [4]:
gem_data_readin = pd.read_excel("../data/GEM_2022_data.xlsx", sheet_name = "Steel Plants").clean_names(strip_underscores = True)

### Filtering to only look at plants and data that we are interested in

In [5]:
## eaf_capacity is in thousand tonnes per year and we are standardizing to tonnes
gem_data_cleaned = gem_data_readin.copy()

## Filtering to the specifications we need
## Only looking at operating steel plants in the US
gem_data_cleaned = gem_data_cleaned[
       (gem_data_cleaned['country'] == "United States") &
       (gem_data_cleaned["status"] == "operating") &
       gem_data_cleaned['nominal_eaf_steel_capacity_ttpa'].notna()
]

## Needed to do this in another step to make sure start_date was properly filtered
## SUS00009 is not currently operating their EAF
## SUS00061 has gone on strike for a long time so they are no longer operating
gem_data_cleaned = gem_data_cleaned[
       (gem_data_cleaned['start_date'] < 2022) &
       (~gem_data_cleaned['plant_id'].isin(["SUS00009", "SUS00061"]))
]

## Renaming columns
gem_data_cleaned = gem_data_cleaned.rename(columns={'plant_name_english':'plant_name'
                                          , 'subnational_unit_province_state':'state'
                                          , 'location_address':'address'})

## Converting EAF capacity from Thousand Tonnes to Tonnes
gem_data_cleaned['eaf_capacity'] = pd.to_numeric(gem_data_cleaned['nominal_eaf_steel_capacity_ttpa'])
gem_data_cleaned['max_tonnes_of_steel_producible_annually'] = gem_data_cleaned['eaf_capacity'] * 1000

## Reordering columns to desired order
gem_data_cleaned = gem_data_cleaned[['plant_id'
         , 'plant_name'
         , 'owner'
         , 'coordinates'
         , 'country'
         , 'state'
         , 'status'
         , 'start_date'
         , 'plant_age_years'
         , 'max_tonnes_of_steel_producible_annually'
         , 'municipality'
         , 'address'
         , 'category_steel_product'
         , 'steel_products'
         , 'responsiblesteel_certification']]

## Removing columns we do not need
gem_data = gem_data_cleaned.drop(columns=['country', 'start_date', 'status', 'responsiblesteel_certification'])

## Separate the "coordinates" column into "lat" and "lon" columns
gem_data[['lat', 'lon']] = gem_data['coordinates'].str.split(',', expand=True)

## Remove the "coordinates" column
gem_data.drop(columns=['coordinates'], inplace=True)

## Reordering columns
gem_data = gem_data[['plant_id', 'plant_name', 'owner', 'lat', 'lon', 'state', 'plant_age_years', 'max_tonnes_of_steel_producible_annually', 'municipality', 'address', 'category_steel_product', 'steel_products']]

The eGrid data loaded below includes the electric grid subregions that we will be looking at. We are find which points overlap which with regions and are assigning those overlaps as the assigned region.

In [6]:
## Reading in data
subregion_shapes_raw = gpd.read_file("../data/egrid2020_subregions/eGRID2020_subregions.shp").clean_names()

## Filtering subregion shapes
subregion_shapes = subregion_shapes_raw[~subregion_shapes_raw['zipsubregi'].isin(["AKGD", "AKMS", "HIMS", "HIOA", "PRMS"])]

## Simplifying subregion shapes
subregion_shapes['geometry'] = subregion_shapes.simplify(tolerance=0.0005)

## Bringing in plant points
plant_points = gpd.GeoDataFrame(gem_data, geometry=gpd.points_from_xy(gem_data['lon'], gem_data['lat']))
plant_points.crs = "EPSG:4326"

## Finding points that overlap with the egrid subregion
## Removing columns we dont need and renaming the subregion column
plant_emissions_by_subregion = gpd.sjoin(plant_points, subregion_shapes, op = 'within').drop(columns = ['geometry', 'index_right', 'shape_leng', 'shape_le_1', 'shape_area']).rename(columns={'zipsubregi':'subregion'})

## Combining our data based off of matching subregions
plant_emissions_by_subregion = plant_emissions_by_subregion.merge(emissions_by_subregion, on = 'subregion', how = 'left')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


### Importing AISI data

The American Iron and Steel Institute (AISI) is a trade association that represents the North American steel industry. It gathers and provides data related to steel production, consumption, trade, and other industry metrics. The AISI data covers various aspects of the steel industry, including information on steel production volumes, capacities, and utilization rates.

We will be using these utilization rates in order to estimate the amount of steel produced per plant per week and per year.

In [7]:
AISI_regions_readin = pd.read_excel("../data/AISI_regions.xlsx", sheet_name="Regions by State").clean_names(strip_underscores = True)

AISI_data_readin = pd.read_excel("../data/AISI_data.xlsx", sheet_name="AISI Production Values").clean_names(strip_underscores = True)

Region Abbreviations:

NE = Northeast

GL = Great Lakes

MW = Midwest

S = Southern

W = Western

### Filtering AISI data, renaming columns, and selecting the weekly data and utilization rates we need

**Utilization is based on tonnage capability to produce raw steel for a sustained full order book.**

In [8]:
## Filtering by "yes" to find plants that are in the data
AISI_regions = AISI_regions_readin[AISI_regions_readin['steel_plant_in_gspt'] == "yes"]

## Renaming columns for clarity
AISI_data = AISI_data_readin.rename(columns = {'north_east_region_capacity_utilization':'NE_util'
                                          , 'great_lakes_region_capacity_utilization':'GL_util'
                                          , 'midwest_region_capacity_utilization':'MW_util'
                                          , 'southern_region_capacity_utilization':'S_util'
                                          , 'western_region_capacity_utilization':'W_util'})

## Reordering columns
AISI_data = AISI_data[['week_end_date', 'NE_util', 'GL_util', 'MW_util', 'S_util', 'W_util']]

## Only looking at weeks before January 1, 2022
AISI_data = AISI_data[AISI_data['week_end_date'] <= '2022-01-01']

## Making week_end_date column into datetime
AISI_data['week_end_date'] = pd.to_datetime(AISI_data['week_end_date'])

## Shortening date to remove timestamps
AISI_data['week_end_date'] = AISI_data['week_end_date'].dt.date

### Merging all of our data so far

In [9]:
## Merging data based off of matching states
regional_plant_emissions = pd.merge(plant_emissions_by_subregion, AISI_regions, on = "state", how = "left")

regional_plant_emissions = regional_plant_emissions.drop(['steel_plant_in_gspt', 'state_abbreviation'], axis = 1)

## assigning eGrid regions to multi-listed cities
conditions = [
    regional_plant_emissions['municipality'].isin(["Alton", "Sterling", "Peoria", "Granite City", "Mansfield", "Middletown"])
    , regional_plant_emissions['municipality'].isin(["Riverdale", "Chicago", "Bourbonnais", "Cuyahoga Heights", "Cleveland", "Toledo", "Lorain"])
    , regional_plant_emissions['municipality'].isin(["Mingo Junction", "Youngstown", "Canton"])
]

choices = ["Midwest", "Great Lakes", "North East"]

regional_plant_emissions['region'] = np.select(conditions, choices, default=regional_plant_emissions['region'])

### Defining carbon intensity for eaf steel production and preparing data to calculate intensities

In [10]:
## co2e_tonnes_per_mwh is from eGrid and is pounds of CO2e per MWH of electricity produced per grid location (not regional location)

## Global Efficiency Intelligence states that it takes 710 KWH to produce 1 tonne of steel. "Global Efficiency Intelligence: Industrial Electrification in U.S. States"
## MWH per tonne of steel
eaf_MWH_per_tonne = 710 / 1000

## emissions_intensity is tonnes of CO2e per tonne of steel (the amount of co2e produced for every tonne of steel produced)

AISI_longer = AISI_data.melt(id_vars= 'week_end_date'
                           , value_vars = ['NE_util', 'GL_util', 'MW_util', 'S_util', 'W_util']
                           , var_name = 'region'
                           , value_name = 'utilization')

region_conditions = [
  AISI_longer['region'] == "NE_util"
  , AISI_longer['region'] == "GL_util"
  , AISI_longer['region'] == "MW_util"
  , AISI_longer['region'] == "S_util"
  , AISI_longer['region'] == "W_util"
]

region_choices = ["North East", "Great Lakes", "Midwest", "Southern", "Western"]

AISI_longer['region'] = np.select(region_conditions, region_choices)

### Finding Weekly Scope 2 co2e values and putting it in long and wide format

In [11]:
## Merging our two datasets based on the region column and defining new columns
scope2_plant_emissions_long = pd.merge(regional_plant_emissions, AISI_longer, how = "left", on = "region")

## Creating relavent columns
scope2_plant_emissions_long['estimated_emissions_intensity_tonne_per_tonne_scope2'] = scope2_plant_emissions_long['co2e_tonnes_per_mwh'] * eaf_MWH_per_tonne
scope2_plant_emissions_long['max_tonnes_of_steel_producible_weekly'] = scope2_plant_emissions_long['max_tonnes_of_steel_producible_annually'] / 52
scope2_plant_emissions_long['scope2_co2e_tonnes_per_week'] = scope2_plant_emissions_long['utilization'] * scope2_plant_emissions_long['max_tonnes_of_steel_producible_weekly'] * scope2_plant_emissions_long['estimated_emissions_intensity_tonne_per_tonne_scope2']

scope2_plant_emissions_long = scope2_plant_emissions_long[["plant_id"
                                                            ,"plant_name"                                          
                                                            ,"owner"                                               
                                                            ,"lat"                                                 
                                                            ,"lon"                                                 
                                                            ,"state"                                               
                                                            ,"plant_age_years"                                     
                                                            ,"municipality"                                        
                                                            ,"address"                                             
                                                            ,"category_steel_product"                              
                                                            ,"steel_products"                                      
                                                            ,"subregion"                                           
                                                            ,"subregion_name"                                      
                                                            ,"co2e_tonnes_per_mwh"                                 
                                                            ,"region"                                              
                                                            ,"week_end_date"                                       
                                                            ,"utilization"                                         
                                                            ,"estimated_emissions_intensity_tonne_per_tonne_scope2"
                                                            ,"max_tonnes_of_steel_producible_annually"             
                                                            ,"max_tonnes_of_steel_producible_weekly"               
                                                            ,"scope2_co2e_tonnes_per_week"]]

## Rounding the data
scope2_plant_emissions_long_rounded = scope2_plant_emissions_long.round(2)

## Transferring each week to be its own column
scope2_plant_emissions_wide = scope2_plant_emissions_long_rounded.drop(columns = ['utilization']).pivot(
    index = ['plant_id', 'plant_name', 'owner', 'lat', 'lon', 'state',
       'plant_age_years', 'municipality', 'address', 'category_steel_product',
       'steel_products', 'subregion', 'subregion_name', 'co2e_tonnes_per_mwh',
       'region',
       'estimated_emissions_intensity_tonne_per_tonne_scope2',
       'max_tonnes_of_steel_producible_annually',
       'max_tonnes_of_steel_producible_weekly']
    , columns = 'week_end_date'
    , values = 'scope2_co2e_tonnes_per_week'
).reset_index(drop=False)


  uniques = Index(uniques)


### Finding Scope 2 values for the complete 2021 year

In [12]:
yearly_scope2 = scope2_plant_emissions_long.groupby(['plant_id', 'plant_name', 'address', 'lat', 'lon']).agg(scope2_co2e_tonnes_2021 = ('scope2_co2e_tonnes_per_week', 'sum')).reset_index()

yearly_scope2 = yearly_scope2.round(3)

### Extracting zip codes to find matches with addresses

In [13]:
## Making a copy of yearly_scope2 to work with
zipcode_extraction = yearly_scope2.copy()

## Extracting zip code from address
zipcode_extraction['address2'] = yearly_scope2['address']

## Separate the addresses based off of commas
zipcode_extraction[['zip_code_part1', 'zip_code_part2', 'zip_code_part3', 'zip_code_part4', 'zip_code_part5']] = zipcode_extraction['address2'].str.split(',', expand = True)

## Drop columns we do not need
zipcode_extraction = zipcode_extraction.drop(columns = ['address2', 'zip_code_part1', 'zip_code_part5'])

## Extract only the numeric portions of the addresses and put in NA's if there are no numbers
zipcode_extraction['zip_code_part2'] = zipcode_extraction['zip_code_part2'].str.extract(r'(\d+)')
zipcode_extraction['zip_code_part3'] = zipcode_extraction['zip_code_part3'].str.extract(r'(\d+)')
zipcode_extraction['zip_code_part4'] = zipcode_extraction['zip_code_part4'].str.extract(r'(\d+)')

## Create zip_code column by selecting the non-null values from zip_code_part2, zip_code_part3, zip_code_part4
zipcode_extraction['zip_code'] = zipcode_extraction['zip_code_part2'].fillna(zipcode_extraction['zip_code_part3']).fillna(zipcode_extraction['zip_code_part4'])

## Dropping columns we do not need
zipcode_extraction = zipcode_extraction.drop(columns = ['zip_code_part2', 'zip_code_part3', 'zip_code_part4'])
zipcode_extraction["zip_code"] = pd.to_numeric(zipcode_extraction["zip_code"])

## Finding which zip codes only occur once and extracting those
checking_unique_zipcodes_scope2 = zipcode_extraction.groupby('zip_code').size().reset_index(name = 'count')
checking_unique_zipcodes_scope2 = checking_unique_zipcodes_scope2[checking_unique_zipcodes_scope2['count'] < 2].dropna().reset_index()
checking_unique_zipcodes_scope2 = checking_unique_zipcodes_scope2[['zip_code']]

## Combining these zip codes back to the original data in order to only merge based off of the unique zip codes we found
unique_scope2_zipcodes = pd.merge(checking_unique_zipcodes_scope2, zipcode_extraction, how = "left", on = "zip_code")

### Reading in Greenhouse Gas Emissions data (scope 1) and extracting the non-repeating zip codes

The Greenhouse Gas Emissions data we are using includes emissions data of co2 equivalences from steel plants in the United States. We assume that this data is scope 1 and does not include any scope 2 sources. This data includes steel plant names, companies, locations, and emissions data.

In [14]:
scope1_readin = pd.read_excel("../data/GHG_flight_scope1.xls", skiprows = 5, na_values = ['', 0]).clean_names(strip_underscores = True)

## Making 0 values to NA to align with R code. Python was not reading them in as NA to begin with
scope1_readin['ghg_quantity_metric_tons_co2e'] = scope1_readin['ghg_quantity_metric_tons_co2e'].replace(0, np.nan)

scope1 = scope1_readin[['zip_code', 'ghg_quantity_metric_tons_co2e', 'ghgrp_id']].dropna()

## Finding which zip codes only occur once and extracting those
checking_unique_zipcodes_scope1 = scope1.groupby('zip_code').size().reset_index(name = 'count')
checking_unique_zipcodes_scope1 = checking_unique_zipcodes_scope1[checking_unique_zipcodes_scope1['count'] < 2].dropna().reset_index()
checking_unique_zipcodes_scope1 = checking_unique_zipcodes_scope1[['zip_code']]

## Combining these zip codes back to the original data in order to only merge based off of the unique zip codes we found
unique_scope1_zipcodes = pd.merge(checking_unique_zipcodes_scope1, scope1, how = "left", on = "zip_code")

### Joining scope 1 and scope 2 unique zip codes

In [15]:
## Manually checked zip codes and addresses and plant names and the following are for sure a match
scope_1_and_2_zipcode_matches_all_columns = pd.merge(unique_scope1_zipcodes, unique_scope2_zipcodes, on = "zip_code", how = "inner").dropna().rename(columns = {'ghg_quantity_metric_tons_co2e':'scope1_co2e_tonnes_2021'})

## Reading 
scope_1_and_2_zipcode_matches = scope_1_and_2_zipcode_matches_all_columns.reindex(columns=["zip_code"])

### Anti-Joining to find remaining zip codes that had not matched and manually matching them

In [16]:
## Anti-Joining to find remaining zip codes that had not matched and manually matching them
remaining_zipcodes = pd.merge(zipcode_extraction, scope_1_and_2_zipcode_matches, on = "zip_code", how = "left", indicator = True)
remaining_zipcodes = remaining_zipcodes[remaining_zipcodes['_merge'] == 'left_only'].drop(columns='_merge')

corrected_ghgids = remaining_zipcodes.copy()

## Correctly matching the zip codes and assigning the plant id equivalents between both datasets 
ghgid_conditions = [
  remaining_zipcodes['plant_id'] == "SUS00002"
, remaining_zipcodes['plant_id'] == "SUS00007"
, remaining_zipcodes['plant_id'] == "SUS00015"
, remaining_zipcodes['plant_id'] == "SUS00042"
, remaining_zipcodes['plant_id'] == "SUS00019"
, remaining_zipcodes['plant_id'] == "SUS00025"
, remaining_zipcodes['plant_id'] == "SUS00032"
, remaining_zipcodes['plant_id'] == "SUS00058"
, remaining_zipcodes['plant_id'] == "SUS00029"
]

ghgid_choices = [
  "1003268"
, "1003668"
, "1000394"
, "1004616"
, "1002977"
, "1007642"
, "1007921"
, "1007348"
, "1005700"]

corrected_ghgids['ghgrp_id'] = np.select(ghgid_conditions, ghgid_choices, default=remaining_zipcodes['plant_id'])

corrected_ghgids['ghgrp_id'] = pd.to_numeric(corrected_ghgids['ghgrp_id'])

corrected_ghgids = pd.merge(corrected_ghgids, scope1, how = "left", on = "ghgrp_id").drop(columns = ['zip_code_x', 'zip_code_y']).rename(columns = {'ghg_quantity_metric_tons_co2e':'scope1_co2e_tonnes_2021'})

### Combining all of our data to get a final dataset with scope 1 and 2 2021 values

In [17]:
## Finding the total amount of emissions per plant per year 
scope_1_and_2_emissions_2021_prestep = pd.merge(scope_1_and_2_zipcode_matches_all_columns, corrected_ghgids, how = "outer").drop(columns = 'zip_code')
scope_1_and_2_emissions_2021_prestep['total_co2e_tonnes_2021'] = scope_1_and_2_emissions_2021_prestep['scope1_co2e_tonnes_2021'] + scope_1_and_2_emissions_2021_prestep['scope2_co2e_tonnes_2021']
# scope_1_and_2_emissions_2021_prestep['zip_code'] = scope_1_and_2_emissions_2021_prestep['zip_code'].apply(int)

scope_1_and_2_emissions_2021 = pd.merge(scope_1_and_2_emissions_2021_prestep, scope2_plant_emissions_long, how = "left").drop(columns = ['week_end_date', 'utilization', 'estimated_emissions_intensity_tonne_per_tonne_scope2', 'max_tonnes_of_steel_producible_weekly', 'scope2_co2e_tonnes_per_week']).drop_duplicates().round(3)

### Adding on the matched columns and id numbers onto our scope 2 data

In [18]:
## Finding weekly scope 2 values first
weekly_scope2_plant_emissions_long = pd.merge(scope_1_and_2_emissions_2021_prestep, scope2_plant_emissions_long, how = "left").drop(columns = ['total_co2e_tonnes_2021', 'scope1_co2e_tonnes_2021', 'scope2_co2e_tonnes_2021']).round(3)

weekly_scope2_plant_emissions_long = weekly_scope2_plant_emissions_long[["plant_id"                                            
,"ghgrp_id"                                            
,"plant_name"                                          
,"lat"                                                 
,"lon"                                                 
,"address"                                             
,"owner"                                               
,"state"                                               
,"plant_age_years"                                     
,"municipality"                                        
,"category_steel_product"                              
,"steel_products"                                      
,"subregion"                                           
,"subregion_name"                                      
,"region"                                              
,"week_end_date"                                       
,"utilization"                                         
,"co2e_tonnes_per_mwh"                                 
,"estimated_emissions_intensity_tonne_per_tonne_scope2"
,"max_tonnes_of_steel_producible_annually"             
,"max_tonnes_of_steel_producible_weekly"               
,"scope2_co2e_tonnes_per_week"]]

weekly_scope2_plant_emissions_wide = weekly_scope2_plant_emissions_long.drop(columns = ['utilization']).pivot(
    index = ["plant_id"                                            
,"ghgrp_id"                                            
,"plant_name"                                          
,"lat"                                                 
,"lon"                                                 
,"address"                                             
,"owner"                                               
,"state"                                               
,"plant_age_years"                                     
,"municipality"                                        
,"category_steel_product"                              
,"steel_products"                                      
,"subregion"                                           
,"subregion_name"                                      
,"region"                                                                                                                         
,"co2e_tonnes_per_mwh"                                 
,"estimated_emissions_intensity_tonne_per_tonne_scope2"
,"max_tonnes_of_steel_producible_annually"             
,"max_tonnes_of_steel_producible_weekly"               ]
    , columns = 'week_end_date'
    , values = 'scope2_co2e_tonnes_per_week'
).reset_index(drop=False)

  uniques = Index(uniques)


### Making our annual 2021 data into weekly to see breakdown of weekly co2e productions that include scope 1 AND scope 2

In [19]:
weekly_scope_1_and_2_long = pd.merge(scope_1_and_2_emissions_2021_prestep, scope2_plant_emissions_long)

weekly_scope_1_and_2_long['scope1_tonnes_of_co2e_per_week'] = weekly_scope_1_and_2_long['scope1_co2e_tonnes_2021'] / 52
weekly_scope_1_and_2_long['total_weekly_co2e_tonnes'] = weekly_scope_1_and_2_long['scope1_tonnes_of_co2e_per_week'] + weekly_scope_1_and_2_long['scope2_co2e_tonnes_per_week'].drop(columns = ['total_co2e_tonnes_2021', 'scope1_co2e_tonnes_2021', 'scope2_co2e_tonnes_2021'])
weekly_scope_1_and_2_long = weekly_scope_1_and_2_long[["plant_id"                                            
,"ghgrp_id"                                            
,"plant_name"                                          
,"lat"                                                 
,"lon"                                                 
,"address"                                             
,"owner"                                               
,"state"                                               
,"plant_age_years"                                     
,"municipality"                                        
,"category_steel_product"                              
,"steel_products"                                      
,"subregion"                                           
,"subregion_name"                                      
,"region"                                              
,"week_end_date"                                       
,"utilization"                                         
,"co2e_tonnes_per_mwh"                                 
,"estimated_emissions_intensity_tonne_per_tonne_scope2"
,"max_tonnes_of_steel_producible_annually"             
,"max_tonnes_of_steel_producible_weekly"               
,"scope2_co2e_tonnes_per_week"                         
,"scope1_tonnes_of_co2e_per_week"                      
,"total_weekly_co2e_tonnes"  ]]


weekly_scope_1_and_2_wide = weekly_scope_1_and_2_long.drop(columns = ['utilization', 'scope2_co2e_tonnes_per_week']).pivot(
    index = ["plant_id"                                            
,"ghgrp_id"                                            
,"plant_name"                                          
,"lat"                                                 
,"lon"                                                 
,"address"                                             
,"owner"                                               
,"state"                                               
,"plant_age_years"                                     
,"municipality"                                        
,"category_steel_product"                              
,"steel_products"                                      
,"subregion"                                           
,"subregion_name"                                      
,"region"                                                                                      
,"co2e_tonnes_per_mwh"                                 
,"estimated_emissions_intensity_tonne_per_tonne_scope2"
,"max_tonnes_of_steel_producible_annually"             
,"max_tonnes_of_steel_producible_weekly"                                       
,"scope1_tonnes_of_co2e_per_week"]
    , columns = 'week_end_date'
    , values = 'total_weekly_co2e_tonnes'
).reset_index(drop=False)

  uniques = Index(uniques)


### Finding the amount of steel produced weekly and annually (_long and _wide denote datasets with weekly values)

In [20]:
### Weekly plant productions long
plant_productions_long = scope2_plant_emissions_long.drop(columns = ['scope2_co2e_tonnes_per_week'])

plant_productions_long['tonnes_of_steel_produced'] = plant_productions_long['utilization'] * plant_productions_long['max_tonnes_of_steel_producible_weekly']

## Rounding the data
plant_productions_long_rounded = plant_productions_long.round(3)

## Amount of steel produced per week
plant_productions_wide = plant_productions_long_rounded.drop(columns = ['utilization']).pivot(
    index = ['plant_id', 'plant_name', 'owner', 'lat', 'lon', 'state',
       'plant_age_years', 'municipality', 'address', 'category_steel_product',
       'steel_products', 'subregion', 'subregion_name', 'co2e_tonnes_per_mwh',
       'region',
       'estimated_emissions_intensity_tonne_per_tonne_scope2',
       'max_tonnes_of_steel_producible_annually',
       'max_tonnes_of_steel_producible_weekly']
    , columns = 'week_end_date'
    , values = 'tonnes_of_steel_produced'
).reset_index(drop=False)

### 2021 Year ----
## Amount of steel produced in 2021
plant_productions_2021 = plant_productions_long_rounded.drop(columns = ['week_end_date', 'utilization'])

plant_productions_2021 = plant_productions_2021.groupby(['plant_id', 'plant_name', 'owner', 'lat', 'lon', 'state',
       'plant_age_years', 'municipality', 'address', 'category_steel_product',
       'steel_products', 'subregion', 'subregion_name', 'co2e_tonnes_per_mwh',
       'region', 'estimated_emissions_intensity_tonne_per_tonne_scope2',
       'max_tonnes_of_steel_producible_annually',
       'max_tonnes_of_steel_producible_weekly']).sum().reset_index().rename(columns = {'tonnes_of_steel_produced':'total_tonnes_steel_produced_2021'})

  uniques = Index(uniques)


In [21]:
total_steel_plant_emissions_and_productions_2021 = pd.merge(plant_productions_2021, scope_1_and_2_emissions_2021, how = 'left')

total_steel_plant_emissions_and_productions_2021['emissions_intensity_co2e_tonne_per_tonne'] = total_steel_plant_emissions_and_productions_2021['total_co2e_tonnes_2021'] / total_steel_plant_emissions_and_productions_2021['total_tonnes_steel_produced_2021']

total_steel_plant_emissions_and_productions_2021['estimated_emissions_intensity_tonne_per_tonne_scope1'] = total_steel_plant_emissions_and_productions_2021['scope1_co2e_tonnes_2021'] / total_steel_plant_emissions_and_productions_2021['total_tonnes_steel_produced_2021']

total_steel_plant_emissions_and_productions_2021 = total_steel_plant_emissions_and_productions_2021[["plant_id"                                            
, "ghgrp_id"                                            
, "plant_name"                                          
, "owner"                                               
, "lat"                                                 
, "lon"                                                 
, "state"                                               
, "plant_age_years"                                     
, "municipality"                                        
, "address"                                             
, "category_steel_product"                              
, "steel_products"                                      
, "subregion"                                           
, "subregion_name"                                      
, "co2e_tonnes_per_mwh"                                 
, "region"                                              
, "estimated_emissions_intensity_tonne_per_tonne_scope1"
, "estimated_emissions_intensity_tonne_per_tonne_scope2"
, "emissions_intensity_co2e_tonne_per_tonne"            
, "max_tonnes_of_steel_producible_annually"             
, "max_tonnes_of_steel_producible_weekly"               
, "scope1_co2e_tonnes_2021"                             
, "scope2_co2e_tonnes_2021"                             
, "total_co2e_tonnes_2021"                              
, "total_tonnes_steel_produced_2021"   ]].round(3)

## Downloading Data

### Weekly scope 1 and 2 co2e wide

In [None]:
weekly_scope_1_and_2_wide.to_csv('output-data/weekly_scope1_scope2_steel_plant_emissions_2021.csv', index=False)

### Weekly scope 1 and 2 co2e long

In [None]:
weekly_scope_1_and_2_long.to_csv('output-data/weekly_scope1_scope2_steel_plant_emissions_2021_long.csv', index=False)

### Weekly scope 2 co2e wide

In [None]:
weekly_scope2_plant_emissions_wide.to_csv('output-data/weekly_scope2_plant_emissions_2021.csv', index=False)

### Weekly scope 2 co2e long

In [65]:
weekly_scope2_plant_emissions_long.to_csv('output-data/weekly_scope2_plant_emissions_2021_long.csv', index=False)

### Total annual productions and scope 1 and 2 co2e emissions

In [None]:
total_steel_plant_emissions_and_productions_2021.to_csv('output-data/2021_steel_plant_emissions_and_productions.csv', index=False)

### Weekly steel plant productions wide

In [None]:
plant_productions_wide.to_csv('output-data/weekly_steel_production.csv', index=False)

### Weekly steel plant productions long

In [None]:
plant_productions_long.to_csv('output-data/weekly_steel_production_long.csv', index=False)