In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import camelot
import re
%matplotlib inline
params = {"text.usetex": True,
          "font.family": "serif",
          "font.serif": ["Computer Modern Serif"]}
plt.rcParams.update(params)

# Set up dictionary to reference full sector names
sector_dict = {
    "ovr": r"\textbf{Overall ECF}",
    "ag": "Agriculture",
    "cn": "Construction",
    "mf": "Manufacturing",
    "mn_rest": "Mining (excl. fossil fuel extraction)",
    "pwr": "Fossil fuel power generation",
    "og": "Oil & gas extraction",
    "coal": "Coal mining",
    "comm": "Commercial",
}

scope_dict = {"scope1": "Scope 1", "scope2": "Scope 2", "scope3": "Scope 3"}

# 1 Read in ECF & demographic data

In [None]:
# Read in final_total
final_total = pd.read_csv(
    '../../overallFootprintCalc/Output/ECF_total.csv',
    index_col=0,
    dtype={'FIPS': str, 'FIPSTATE': str, 'RUCC_2013': str}
)
final_total = final_total.rename(
    columns={'tonneCO2e_eff_peremp_avg': 'ECF',
             'tonneCO2e_eff_peremp_avg_log10': 'ECF_log10'})

# 2 Compare IRA vs non-IRA counties

## 2.1 Updated treasury estimates

In [None]:
# Read in fossil fuel employment MSA/non-MSAs (at the county-level)
ffemp_counties = pd.read_excel(
    '../Input/TreasuryData/msa_nmsa_fee_ec_status_2023v2/MSA_NonMSA_EnergyCommunities_FossilFuelEmp_2023v2.xlsx',
    sheet_name='MSA_NMSA_FFE_EC',
    dtype={'geoid_county_2020': str, 'msa_area_id': str}
)
ffemp_counties = ffemp_counties.rename(columns={'geoid_county_2020': 'FIPS'})
ffemp_counties = ffemp_counties.rename(columns={'msa_area_id': 'MSA', 'ec_qual_status': 'ec_ffe_status'})
ffemp_counties = ffemp_counties.replace(['Yes', 'No'], [1, 0])
ffemp_counties['FIPSTATE'] = ffemp_counties.FIPS.apply(lambda x: x[:2])
ffemp_counties = ffemp_counties.drop(columns=['fipstate_2020', 'fipscounty_2020', 'AFFGEOID_County_2020'])

import os
path = os.getcwd()
os.chdir(path)

# Read in coal closure tracts
coal_counties = pd.read_excel(path[:-7] + 
    'Input/TreasuryData/ira_coal_closure_energy_comm_2023v2/IRA_EnergyCommunities_CensusTracts_CoalClosures_2023v2.xlsx',
    sheet_name='Coal Closures',
    dtype={'geoid_county_2020': str, 'geoid_tract_2020': str}
)
coal_counties = coal_counties.rename(columns={'geoid_county_2020': 'FIPS'})
coal_counties['FIPS'] = coal_counties.FIPS.apply(
    lambda x: '0' + x if len(x) == 4 else x
)
coal_counties = coal_counties.groupby(by=['FIPS'],
                                      as_index=False).sum()

# Merge these files together
ira_counties = ffemp_counties[ffemp_counties.ec_ffe_status == 1].merge(
    coal_counties[['FIPS']],
    how='outer',
    on='FIPS'
)

# Create dataframe with ECF values and classify each row as qualifying vs non-qualifying
ira_counties_ecf = final_total.copy()
ira_counties_ecf['ec'] = ira_counties_ecf.apply(
    lambda x: 1 if x.FIPS in ira_counties.FIPS.values else 0, axis=1)
ira_counties_ecf['ec_ffe'] = ira_counties_ecf.apply(
    lambda x: 1 if x.FIPS in ffemp_counties[ffemp_counties.ec_ffe_status == 1].FIPS.values else 0, axis=1)
ira_counties_ecf['ec_coal'] = ira_counties_ecf.apply(
    lambda x: 1 if x.FIPS in coal_counties.FIPS.values else 0, axis=1)
ira_counties_ecf.to_csv('../Temp/ira_counties_ecf.csv')

In [None]:
# Plot histograms of qualifying vs non-qualifying counties
fig, ax = plt.subplots(figsize=(7,4))

energy_comm_labels = ['Non-qualifying counties', 'Qualifying counties']
const_binwidth = 0.09
for i in [1,0]:
    if i == 1:
        sns.histplot(
            data=ira_counties_ecf[ira_counties_ecf.ec_ffe == i].ECF,
            log_scale=True,
            label=energy_comm_labels[i],
            ax=ax,
            stat='proportion',
            alpha=0.6,
            binwidth=const_binwidth,
        )
    else:
        sns.histplot(
            data=ira_counties_ecf[ira_counties_ecf.ec == i].ECF,
            log_scale=True,
            label=energy_comm_labels[i],
            ax=ax,
            stat='proportion',
            alpha=0.6,
            binwidth=const_binwidth
        )
        
ax.legend(fontsize=11)
ax.set_xlabel('Employment carbon footprint (tonnes CO$_{2}$e per employee)', fontsize=12)
ax.set_ylabel('Proportion', fontsize=12)
fig.suptitle('ECF distributions for counties with and\nwithout qualifying IRA energy communities\n(fossil-fuel employment communities only)', fontsize=16)
fig.tight_layout()
plt.savefig('../Figures/ira_ecf_hist_ffemponly.pdf', bbox_inches='tight', dpi=200)
plt.show()

In [None]:
print("number of 90th percentile, non-IRA counties:",
      len(
          ira_counties_ecf[
              (ira_counties_ecf.tonCO2e_eff_peremp_avg >
               ira_counties_ecf.ECF.quantile(q=0.9)) & (ira_counties_ecf.ec == 0)
          ]
      )
      )
print("number of 10th percentile, IRA counties:", len(ira_counties_ecf[(
    ira_counties_ecf.tonCO2e_eff_peremp_avg < ira_counties_ecf.ECF.quantile(q=0.1)) & (ira_counties_ecf.ec_ffe == 1)]))
print("number of 20th percentile, IRA counties:", len(ira_counties_ecf[(
    ira_counties_ecf.tonCO2e_eff_peremp_avg < ira_counties_ecf.ECF.quantile(q=0.2)) & (ira_counties_ecf.ec_ffe == 1)]))

# 3 Compare FFE eligibility using 2021 and 2022 unemployment rates

In [None]:
# Read in Local Area Unemployment Statistics (LAUS) at the county-level for 2021 and 2022
# These statistics are used by Treasury to calculate the MSA/non-MSA unemployment rate,
# according to https://energycommunities.gov/energy-community-tax-credit-bonus-faqs/
laus = {}
for year in [21, 22]:
    laus[year] = pd.read_excel(
        io=f'../Input/LAUS/laucnty{year}.xlsx',
        header=5,
        usecols='A:E,G:J',
        names=['laus_code', 'state_fips', 'county_fips', 'County', 'year',
               'labor_force', 'employed', 'unemployed', 'unemp_rate'],
        dtype={'state_fips': str, 'county_fips': str, 'year': str}
    )
    laus[year]['FIPS'] = laus[year].state_fips + laus[year].county_fips
    laus[year] = laus[year].drop(columns=['state_fips', 'county_fips'])
    laus[year] = laus[year].iloc[:-3]

# Read in county-MSA/non-MSA crosswalk used by Treasury for EC analysis, from Appendix A of IRS 
# Notice 2023-29 (https://www.irs.gov/pub/irs-drop/n-23-29-appendix-a.pdf). More details on 
# how the MSA/non-MSAs were defined is available at https://energycommunities.gov/energy-community-tax-credit-bonus-faqs/
table_lst = camelot.read_pdf("../Input/TreasuryData/n-23-29-appendix-a.pdf", pages='all') # read pdf table to pandas
county_msa_crosswalk_lst = [table.df for table in table_lst]
county_msa_crosswalk_raw = pd.concat(county_msa_crosswalk_lst)
county_msa_crosswalk_raw.columns = ['FIPSTATE', 'county_code', 'State_Name', 'County', 'MSA_code', 'msa_name']
county_msa_crosswalk = county_msa_crosswalk_raw.replace(r"\n", "", regex=True)
county_msa_crosswalk = county_msa_crosswalk.iloc[1:, :].reset_index(drop=True)
county_msa_crosswalk['FIPS'] = county_msa_crosswalk.FIPSTATE + county_msa_crosswalk.county_code
county_msa_crosswalk['MSA'] = county_msa_crosswalk.apply(
    lambda x: re.match(r'^(\d+)', x.msa_name).group(1) if x.MSA_code == '' else x.MSA_code,
    axis=1
)
county_msa_crosswalk['MSA'] = county_msa_crosswalk['MSA'].str.replace(r' .*', '', regex=True)
county_msa_crosswalk = county_msa_crosswalk.drop(columns='MSA_code')

# Merge MSA/non-MSA code onto each set of unemployment statistics
for year in [21, 22]:
    laus[year] = laus[year].merge(
        county_msa_crosswalk[['FIPS', 'FIPSTATE', 'MSA']],
        how='left',
        on='FIPS'
    )

In [None]:
# Aggregate total labor force and number of unemployed individuals to the MSA/non-MSA level,
# calculate the aggregate unemployment rate for each MSA/non-MSA as well as national rate,
# and identify areas with unemployment higher than the national rate
laus_msa = {}
US_unemp_rate = {}
for year in [21, 22]:
    US_unemp_rate[year] = laus[year].unemployed.sum(
    ) / laus[year].labor_force.sum() * 100

    laus_msa[year] = laus[year].groupby(by=['MSA'], as_index=False).sum()
    laus_msa[year]['unemp_rate'] = laus_msa[year].unemployed / \
        laus_msa[year].labor_force * 100

    laus_msa[year]['unemp_qual_status'] = laus_msa[year].apply(
        lambda x: 1 if x.unemp_rate > US_unemp_rate[year] else 0, axis=1
    )

# Create dataframe of whether ff employment test is passed, by MSA
ffemp_msa = ffemp_counties.drop_duplicates(subset=['MSA'])

# Merge FFE qualifying status from treasury data onto these dataframes, and determine
# overall EC qualification status
ffe_msa = {}
for year in [21, 22]:
    ffe_msa[year] = laus_msa[year].merge(
        ffemp_msa[['MSA', 'ffe_qual_status', 'ec_ffe_status']].rename(
            columns={'ec_ffe_status': 'ec_ffe_status_OFFICIAL'}),
        how='left',
        on='MSA'
    )
    ffe_msa[year] = ffe_msa[year].fillna(0)
    ffe_msa[year]['ec_ffe_status'] = ffe_msa[year].apply(
        lambda x: 1 if x.unemp_qual_status == 1 and x.ffe_qual_status == 1 else 0, axis=1
    )

# For 2022 figures, need to make sure that our calculated EC status based on FFE criterion matches
# that which the treasury actually determined (ie that in ffemp_counties and ffe_msa). Compare
# the two, and overwrite any discrepancies
print("no. MSAs with different EC qualification between our calculations and Treasury's:",
      len(ffe_msa[22][ffe_msa[22].ec_ffe_status_OFFICIAL !=
          ffe_msa[22].ec_ffe_status])
      )
ffe_msa[22] = ffe_msa[22].drop(columns='ec_ffe_status').rename(
    columns={'ec_ffe_status_OFFICIAL': 'ec_ffe_status'})

# Create new dataframe with 2021 and 2022 EC eligibility
ec_ffe_msa = ffe_msa[21][
    ['MSA', 'unemp_qual_status', 'ffe_qual_status', 'ec_ffe_status']
].merge(
    ffe_msa[22][['MSA', 'unemp_qual_status',
                 'ffe_qual_status', 'ec_ffe_status']],
    how='left',
    on='MSA'
)
ec_ffe_msa = ec_ffe_msa.rename(
    columns={'unemp_qual_status_x': 'unemp_qual_status_21',
             'unemp_qual_status_y': 'unemp_qual_status_22',
             'ffe_qual_status_x': 'ffe_qual_status_21',
             'ffe_qual_status_y': 'ffe_qual_status_22',
             'ec_ffe_status_x': 'ec_ffe_status_21',
             'ec_ffe_status_y': 'ec_ffe_status_22', }
)
ec_ffe_counties = county_msa_crosswalk[['FIPS', 'MSA']].merge(
    ec_ffe_msa,
    how='left',
    on='MSA'
)
ec_ffe_counties = ec_ffe_counties.dropna()

ec_ffe_counties.to_csv('../Temp/ec_ffe_counties.csv')

# 4 Analyze most carbon-intensive non-qualifying counties

In [None]:
# Isolate 100 counties with highest ECFs that do not contain ECs
ira_counties_ecf_top100 = ira_counties_ecf[
    ira_counties_ecf.ec == 0].sort_values(by='ECF', ascending=False).head(100)

# Read in sectoral ECF data and isolate counties identified above
ECF_sector = pd.read_csv('../../overallFootprintCalc/Output/ECF_sector.csv',
                           index_col=0,
                           dtype={'FIPS': str, 'FIPSTATE': str})
ECF_sector_top100 = ECF_sector[
    ECF_sector.FIPS.isin(ira_counties_ecf_top100.FIPS.unique())
].reset_index(drop=True)

# Create pivot table of CO2 emissions
ECF_sector_top100_pivot = ECF_sector_top100.fillna('NA')
ECF_sector_top100_pivot = ECF_sector_top100_pivot.pivot_table(
    columns='sector',
    values='tonCO2e_eff_avg',
    index=['FIPS', 'County', 'State']
)
ECF_sector_top100_pivot = ECF_sector_top100_pivot.fillna(0).reset_index()
ECF_sector_top100_pivot = ECF_sector_top100_pivot.replace('NA', np.nan)

ECF_sector_top100_pivot['TOTAL'] = ECF_sector_top100_pivot.iloc[:, 3:].sum(
    axis=1)

sums = ECF_sector_top100_pivot.iloc[:, 3:].sum()

# Calculate the proportion of emissions from each sector
for sector in ECF_sector_top100.sector.unique():
    ECF_sector_top100_pivot['prop_' +
                              sector] = ECF_sector_top100_pivot[sector] / ECF_sector_top100_pivot.TOTAL
ECF_sector_top100_pivot = ECF_sector_top100_pivot.merge(
    ira_counties_ecf[['FIPS', 'ECF', 'ECF_log10']],
    how='left',
    on='FIPS'
)

# Identify sector with greatest share of emissions for each county
ECF_sector_top100_pivot['biggest_sector'] = ECF_sector_top100_pivot[[
    i for i in ECF_sector_top100.sector.unique()]].idxmax(axis="columns")
ECF_sector_top100_pivot['biggest_sector'] = ECF_sector_top100_pivot.apply(
    lambda x: x.biggest_sector if x["prop_" + x.biggest_sector] > 0.5 else "NA", axis=1
)
ECF_sector_top100_pivot['biggest_sector_full'] = ECF_sector_top100_pivot['biggest_sector'].apply(
    lambda x: 'NA (no majority sector)' if x == 'NA' else sector_dict[x]
)
ECF_sector_top100_pivot.to_csv('../Temp/ECF_sector_top100_pivot.csv')