In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
from matplotlib import ticker as mticker
%matplotlib inline


# 1 Read in necessary data

In [33]:
# Read in ECF, demographic, and IRA classification data and merge together
totalECF_demo = pd.read_csv(
    'totalECF_demo.csv',
    index_col=0,
    dtype={'FIPS': str, 'FIPSTATE': str}
)

ECF_sector = pd.read_csv(
    'https://raw.githubusercontent.com/kailingraham/GrahamKnittel_EmploymentCarbonFootprints_Data/main/ECF_sector.csv',
    index_col=0,
    dtype={'FIPS': str, 'FIPSTATE': str}
)
ECF_sector = ECF_sector.rename(
    columns={'tonneCO2e_eff_peremp_avg': 'ECF',
             'tonneCO2e_eff_peremp_avg_log10': 'ECF_log10'}
)
ECF_sector['tonneCO2e_eff_avg'] = ECF_sector.tonCO2e_eff_avg * 0.907185

ira_counties_ecf = pd.read_csv(
    'ira_counties_ecf.csv',
    index_col=0,
    dtype={'FIPS': str, 'FIPSTATE': str}
)

totalECF_demo = totalECF_demo.merge(
    ira_counties_ecf[[
        col for col in ira_counties_ecf.columns if col not in totalECF_demo.columns] + ['FIPS']],
    on='FIPS',
    how='left'
)

# 2 Prepare mapping dataframe


In [3]:
# Load in a GeoJSON file containing the geometry information for US counties, where feature.id is a FIPS code.
from urllib.request import urlopen
import json
import geopandas

with urlopen("https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json") as response:
    counties = json.load(response)

# Create geodataframe from the same file
counties_gdf = geopandas.read_file("https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json")

In [4]:
# Filter out necessary columns for mapping dataframe
from shapely.geometry.collection import GeometryCollection
totalECF_demo_tool = totalECF_demo[
    ['FIPS', 'County', 'FIPSTATE', 'State',
     'ECF', 'ECF_log10',
     'Emp', 'POP', 'POP_log10', 'MIG_TOT', 'MIG_TOT_log10',
     'MIG_PERCENT', 'UNEMP_RATE', 'POV_RATE', 'INC_IND_TOT',
     'ED_PERCENT_TERTIARY', 'pop_per_sqmi', 'pop_per_sqmi_log10',
     'county_preferred_party', 'RUCC_2013',
     'county_percent_D', 'county_percent_R', 'ETHN_LATIN', #'RACE_NONWHITE_PERCENT',
     'ETHN_LATIN_PERCENT','RACE_BLACK_PERCENT', 'RACE_NATAMERICAN_PERCENT', 'RACE_ASIAN_PERCENT',
     'RACE_HAWAII_PACISLAND_PERCENT', 'RACE_OTHER_PERCENT', 'RACE_PERCENT_MINORITY',
     'RACE_PERCENT_MINORITY_log10', 'ec', 'ec_ffe', 'ec_coal'
     ]
]
# totalECF_demo_tool['MIG_TOT_log10'] = totalECF_demo_tool.apply(
#     lambda x: 0 if x.MIG_TOT_log10 == -np.infty else x.MIG_TOT_log10, axis=1)
mean, std = totalECF_demo_tool.ECF_log10.mean(), totalECF_demo_tool.ECF_log10.std()
totalECF_demo_tool['ECF_log10_zscore'] = totalECF_demo_tool.ECF_log10.apply(
    lambda x: (x - mean) / std)

totalECF_demo_tool = totalECF_demo_tool.rename(
    columns={'RACE_BLACK_PERCENT': 'Black', 'RACE_NATAMERICAN_PERCENT': 'Native American', 'RACE_ASIAN_PERCENT': 'Asian',
             'RACE_HAWAII_PACISLAND_PERCENT': 'Hawaiian/Pac. Islander', 'RACE_OTHER_PERCENT': 'Other'})

# Determine and calculate pop shares of top and second top races per county
totalECF_demo_tool['top_race'] = totalECF_demo_tool[
    ['Black', 'Native American', 'Asian', 'Hawaiian/Pac. Islander', 'Other']
].idxmax(axis='columns', skipna=True)
totalECF_demo_tool['top_race_percent'] = totalECF_demo_tool.apply(
    lambda x: x[x.top_race] if type(x.top_race) == str else np.nan, axis=1)

next_top_races = []
for i in range(len(totalECF_demo_tool)):
    df = pd.DataFrame(totalECF_demo_tool.loc[
        i, [race for race in ['Black', 'Native American', 'Asian', 'Hawaiian/Pac. Islander', 'Other']
         if race != totalECF_demo_tool.loc[i, 'top_race']]
    ]).T.astype(float)
    next_top_races.append(df.idxmax(axis='columns', skipna=True).values[0])

totalECF_demo_tool['next_top_race'] = next_top_races
totalECF_demo_tool['next_top_race_percent'] = totalECF_demo_tool.apply(
    lambda x: x[x.next_top_race] if type(x.next_top_race) == str else np.nan, axis=1)

# Create geodataframe with totalECF_demo data and county geometries
totalECF_demo_tool = counties_gdf.rename(
    columns={'id': 'FIPS'}
)[['FIPS', 'CENSUSAREA', 'geometry']].merge(totalECF_demo_tool, how='right', on='FIPS')

# Deal with empty geometry fields
totalECF_demo_tool['geometry'] = totalECF_demo_tool.geometry.apply(
    lambda x: x if x else GeometryCollection())

# Save to geojson
totalECF_demo_tool.to_file('totalECF_demo_tool.geojson', driver='GeoJSON')
totalECF_demo_tool.to_file('totalECF_demo_tool.json')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  totalECF_demo_tool['ECF_log10_zscore'] = totalECF_demo_tool.ECF_log10.apply(


# 3 Prepare socioeconomic dataframe

In [39]:
socioec_ECF_sectoral = ECF_sector.pivot_table(
    index='FIPS',
    columns='sector',
    values=['Emp', 'tonneCO2e_eff_avg']
).reset_index()

for sector in ECF_sector.sector.unique():
    socioec_ECF_sectoral[f'EMP_{sector}'] = socioec_ECF_sectoral[('Emp', sector)]
    socioec_ECF_sectoral[f'TONNECO2E_{sector}'] = socioec_ECF_sectoral[('tonneCO2e_eff_avg', sector)]
socioec_ECF_sectoral = socioec_ECF_sectoral.drop(
    columns=['Emp', 'tonneCO2e_eff_avg']).droplevel(level=1, axis=1)
socioec_ECF_sectoral

  socioec_ECF_sectoral = socioec_ECF_sectoral.drop(


Unnamed: 0,FIPS,EMP_ag,TONNECO2E_ag,EMP_cn,TONNECO2E_cn,EMP_comm,TONNECO2E_comm,EMP_mf,TONNECO2E_mf,EMP_mn_rest,TONNECO2E_mn_rest,EMP_pwr,TONNECO2E_pwr,EMP_og,TONNECO2E_og,EMP_coal,TONNECO2E_coal
0,01001,110.25,5625.087690,522.00,7586.227945,7770.750000,35154.875731,1527.25,783299.535970,74.75,2.427040e+03,147.678138,5.946155e+05,,,,
1,01003,682.00,15358.163939,3929.25,45476.762682,53394.750000,314200.544449,3937.25,210100.722467,72.75,3.159219e+03,,,,6.613737e+04,,
2,01005,174.50,11523.689452,140.75,3290.463141,3586.083333,20826.910720,2390.25,112314.165031,154.25,1.114503e+05,,,,,,
3,01007,116.00,3234.842611,760.25,2733.956634,2170.750000,3407.383599,328.50,1049.815974,,2.131786e+03,,,,,,
4,01009,64.00,35464.069092,484.25,8909.227112,5230.750000,33491.091061,1189.50,24994.521341,,9.587582e+02,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3134,56037,7.50,5139.528477,1633.50,12347.169382,10670.250000,18909.008358,1387.75,809954.007288,3647.00,5.903716e+06,275.138985,2.606519e+06,428.50,1.201701e+06,511.0,245175.470175
3135,56039,84.75,2913.675356,2132.75,14874.864980,15027.750000,18975.521308,146.75,3671.081531,,3.766434e+02,1.000000,2.722209e+01,3.00,2.641539e+03,,
3136,56041,29.50,8572.290137,778.25,7458.387607,4977.500000,13461.484843,277.25,114586.493815,295.75,3.700142e+03,,,46.75,5.644267e+05,,3566.905399
3137,56043,120.00,12350.617024,245.25,4926.875907,1518.250000,3383.632788,417.75,49711.810500,59.00,1.356835e+04,,,26.25,3.377233e+04,,


In [41]:
# Filter out columns needed for the socioeconomic pop-up dashboard
socioec_ECF = totalECF_demo[
    ['FIPS', 'County', 'FIPSTATE', 'State',
     'tonneCO2e_eff_avg', 'ECF', 'ECF_log10',
     'POP', 'Emp', 'UNEMP_RATE', 'POV_RATE', 'INC_IND_TOT', 'ED_PERCENT_TERTIARY'
     ]
]

# Rename & reformat columns
socioec_ECF['County'] = socioec_ECF['County'].str.capitalize()
socioec_ECF = socioec_ECF.rename(
    columns={
        'County': 'COUNTY',
        'State': 'STATE',
        'tonneCO2e_eff_avg': 'TONNECO2E',
        'Emp': 'EMP_TOT'})

# Pivot sectoral data to give tonneCO2e and emp per sector for each county
socioec_ECF_sectoral = ECF_sector.pivot_table(
    index='FIPS',
    columns='sector',
    values=['Emp', 'tonneCO2e_eff_avg']
).reset_index()
for sector in ECF_sector.sector.unique():
    socioec_ECF_sectoral[f'EMP_{sector}'] = socioec_ECF_sectoral[('Emp', sector)]
    socioec_ECF_sectoral[f'TONNECO2E_{sector}'] = socioec_ECF_sectoral[('tonneCO2e_eff_avg', sector)]
socioec_ECF_sectoral = socioec_ECF_sectoral.drop(
    columns=['Emp', 'tonneCO2e_eff_avg']).droplevel(level=1, axis=1)

# Merge sectoral data onto total county data
socioec_ECF = pd.merge(socioec_ECF, socioec_ECF_sectoral, how='left', on='FIPS')
for sector in ECF_sector.sector.unique():
    socioec_ECF[f'EMP_{sector}'] = socioec_ECF.apply(
        lambda x: x[f'EMP_{sector}'] if x[f'EMP_{sector}'] > 0 else
        (x[f'EMP_{sector}'] if x[f'TONNECO2E_{sector}'] > 0 else 0),
        axis=1
        )
    socioec_ECF[f'TONNECO2E_{sector}'] = socioec_ECF[f'TONNECO2E_{sector}'].fillna(0)
    socioec_ECF[f'EMP_{sector.upper()}_PERCENT'] = socioec_ECF[f'EMP_{sector}'] / \
        socioec_ECF.EMP_TOT * 100
    socioec_ECF[f'TONNECO2E_{sector.upper()}_PERCENT'] = socioec_ECF[f'TONNECO2E_{sector}'] / \
        socioec_ECF.TONNECO2E * 100
    socioec_ECF = socioec_ECF.drop(columns=[f'EMP_{sector}', f'TONNECO2E_{sector}'])

# Create state and national ECF figures
totalECF_state_demo = totalECF_demo[
    ['FIPSTATE', 'State', 'tonneCO2e_eff_avg', 'burden_avg', 'POP', 'Emp']
].groupby(by=['FIPSTATE', 'State'], as_index=False).sum()
totalECF_state_demo['ECF'] = totalECF_state_demo.tonneCO2e_eff_avg / \
    totalECF_state_demo.Emp
totalECF_state_demo['ECF_log10'] = np.log10(
    totalECF_state_demo['ECF']
)
mean = np.mean(totalECF_state_demo.ECF_log10)
std = np.std(totalECF_state_demo.ECF_log10)
totalECF_state_demo = totalECF_state_demo.rename(
    columns={'ECF': 'ECF_ST',
             'ECF_log10': 'ECF_ST_log10'}
)
totalECF_state_demo = totalECF_state_demo[['FIPSTATE', 'ECF_ST', 'ECF_ST_log10']]

totalECF_US = totalECF_demo.tonneCO2e_eff_avg.sum() / totalECF_demo.Emp.sum()
totalECF_US_log10 = np.log10(totalECF_US)

# Merge onto total county data
socioec_ECF = pd.merge(socioec_ECF, totalECF_state_demo, how='left', on='FIPSTATE')
socioec_ECF['ECF_US'] = totalECF_US
socioec_ECF['ECF_US_log10'] = totalECF_US_log10
socioec_ECF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  socioec_ECF['County'] = socioec_ECF['County'].str.capitalize()
  socioec_ECF_sectoral = socioec_ECF_sectoral.drop(


Unnamed: 0,FIPS,COUNTY,FIPSTATE,STATE,TONNECO2E,ECF,ECF_log10,POP,EMP_TOT,UNEMP_RATE,...,EMP_PWR_PERCENT,TONNECO2E_PWR_PERCENT,EMP_OG_PERCENT,TONNECO2E_OG_PERCENT,EMP_COAL_PERCENT,TONNECO2E_COAL_PERCENT,ECF_ST,ECF_ST_log10,ECF_US,ECF_US_log10
0,01001,Autauga,01,AL,1.428708e+06,116.455715,2.066161,55639.0,12268.25,1.322813,...,1.203742,41.619114,0.000000,0.000000,0.000000,0.000000,23.449232,1.370129,13.937224,1.144176
1,01003,Baldwin,01,AL,6.544326e+05,9.034288,0.955894,218289.0,72438.75,1.844802,...,0.000000,0.000000,,10.106063,0.000000,0.000000,23.449232,1.370129,13.937224,1.144176
2,01005,Barbour,01,AL,2.594055e+05,33.048442,1.519151,25026.0,7849.25,2.593303,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,23.449232,1.370129,13.937224,1.144176
3,01007,Bibb,01,AL,1.255778e+04,3.053564,0.484807,22374.0,4112.50,2.981139,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,23.449232,1.370129,13.937224,1.144176
4,01009,Blount,01,AL,1.038176e+05,12.043460,1.080751,57755.0,8620.25,2.169509,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,23.449232,1.370129,13.937224,1.144176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3134,56037,Sweetwater,56,WY,1.080346e+07,487.619650,2.688081,43352.0,22155.50,3.314726,...,1.241854,24.126715,1.934057,11.123297,2.306425,2.269417,153.189567,2.185229,13.937224,1.144176
3135,56039,Teton,56,WY,4.348053e+04,2.091416,0.320440,23356.0,20790.00,1.198835,...,0.004810,0.062608,0.014430,6.075223,0.000000,0.000000,153.189567,2.185229,13.937224,1.144176
3136,56041,Uinta,56,WY,7.157722e+05,86.547831,1.937256,20374.0,8270.25,2.223422,...,0.000000,0.000000,0.565279,78.855638,,0.498330,153.189567,2.185229,13.937224,1.144176
3137,56043,Washakie,56,WY,1.177136e+05,34.439314,1.537054,7933.0,3418.00,2.142947,...,0.000000,0.000000,0.767993,28.690257,0.000000,0.000000,153.189567,2.185229,13.937224,1.144176


In [42]:
# Write to CSV
socioec_ECF.to_csv('socioec_ECF.csv')