!pip install usaddress

In [4]:
import pandas as pd
import numpy as np
import re
import usaddress

In [3]:
esg = pd.read_excel('data/processed/esg_clean.xlsx')
gdp = pd.read_excel('data/processed/gdp_clean.xlsx')
state_name_gdp = pd.read_excel('data/processed/state_name_gdp_clean.xlsx')
fin = pd.read_excel('data/processed/fin_clean.xlsx')

# 0. Understand the ESG scores

In [5]:
esg_scores = esg[['Total ESG Risk score', 'Environment Risk Score', 'Governance Risk Score', 'Social Risk Score']]
esg_scores.describe()

Unnamed: 0,Total ESG Risk score,Environment Risk Score,Governance Risk Score,Social Risk Score
count,432.0,432.0,432.0,432.0
mean,21.393519,5.675463,6.669676,9.023148
std,7.247546,5.345653,2.201841,3.790888
min,7.0,0.0,3.0,1.1
25%,16.0,1.5,5.0,6.6
50%,21.0,3.8,6.0,8.7
75%,26.0,8.925,7.7,11.525
max,46.0,25.0,15.5,21.0


# 1. Get location info from company's address

In [6]:
def get_state_name(address):
    state = None
    try:
        parsed_address = usaddress.parse(address)
        for component in parsed_address:
            if 'StateName' in component:
                state = component[0]
    except:
        print('company not in U.S')

    return state

In [7]:
# extract the state info from address
esg['state'] = esg['Address'].apply(get_state_name)

In [8]:
# in this esg dataset, 17 companies not based in U.S
len(esg[esg['state'].isnull()])

17

In [9]:
esg.head(5)

Unnamed: 0,Symbol,Name,Address,Sector,Industry,Full Time Employees,Description,Total ESG Risk score,Environment Risk Score,Governance Risk Score,Social Risk Score,Controversy Level,Controversy Score,ESG Risk Percentile,ESG Risk Level,state
0,A,Agilent Technologies Inc,"5301 Stevens Creek Boulevard\nSanta Clara, CA ...",Healthcare,Diagnostics & Research,18000,"Agilent Technologies, Inc. provides applicatio...",15,0.3,6.3,8.6,Low,1,11th percentile,Low,CA
1,AAL,American Airlines Group Inc,"1 Skyview Drive\nFort Worth, TX 76155\nUnited ...",Industrials,Airlines,132500,"American Airlines Group Inc., through its subs...",29,12.0,5.0,12.0,Moderate,2,62nd percentile,,TX
2,AAP,Advance Auto Parts Inc,"4200 Six Forks Road\nRaleigh, NC 27609\nUnited...",Consumer Cyclical,Specialty Retail,40000,"Advance Auto Parts, Inc. provides automotive r...",12,0.0,3.0,8.0,Moderate,2,4th percentile,Negligible,NC
3,AAPL,Apple Inc,"One Apple Park Way\nCupertino, CA 95014\nUnite...",Technology,Consumer Electronics,164000,"Apple Inc. designs, manufactures, and markets ...",17,0.6,9.2,6.9,Significant,3,15th percentile,Low,CA
4,ABBV,Abbvie Inc,"1 North Waukegan Road\nNorth Chicago, IL 60064...",Healthcare,Drug Manufacturers—General,50000,"AbbVie Inc. discovers, develops, manufactures,...",28,1.1,9.9,16.8,Significant,3,55th percentile,Medium,IL


# 1.  Geography & ESG

In [22]:
# calculate the average Total ESG risk score by state
groupby_geo = esg.groupby('state')['Total ESG Risk score'].agg('mean').sort_values(ascending=False).head(10)
groupby_geo_df = pd.DataFrame({'state_abbreviation':groupby_geo.index,
                               'average ESG scores':groupby_geo.values})
groupby_geo_df

Unnamed: 0,state_abbreviation,average ESG scores
0,LA,30.0
1,OK,29.0
2,DE,26.5
3,AZ,26.166667
4,AR,25.333333
5,TX,25.282051
6,MI,25.0
7,NE,25.0
8,WI,24.6
9,PA,24.533333


In [12]:
# merge with the state_name_gdp dataframe together for better analysis
gro_gdp_esg = pd.merge(state_name_gdp,groupby_geo_df,on='state_abbreviation')
gro_gdp_esg

Unnamed: 0,state,state_abbreviation,gdp_2022,gdp_2023,change_percentage,average ESG scores
0,Arizona,AZ,458950,479759,0.04534,26.166667
1,Arkansas,AR,165221,171152,0.035897,25.333333
2,Delaware,DE,87525,91581,0.046341,26.5
3,Louisiana,LA,281429,289945,0.03026,30.0
4,Michigan,MI,620696,645293,0.039628,25.0
5,Nebraska,NE,161702,170145,0.052213,25.0
6,Oklahoma,OK,240534,243350,0.011707,29.0
7,Pennsylvania,PA,923089,961946,0.042095,24.533333
8,Texas,TX,2355960,2436346,0.03412,25.282051
9,Wisconsin,WI,401792,417301,0.0386,24.6


# 2.  Sector/Industry & ESG

In [13]:
len(esg['Industry'].unique())

108

In [14]:
# calculate the average Total ESG Risk Score by sector
esg.groupby('Sector')['Total ESG Risk score'].agg('mean').sort_values(ascending=False)

Sector
Energy                    33.150000
Utilities                 27.750000
Basic Materials           27.421053
Consumer Defensive        24.382353
Industrials               23.724138
Healthcare                20.924528
Financial Services        20.557377
Communication Services    19.500000
Consumer Cyclical         18.796296
Technology                16.854839
Real Estate               13.103448
Name: Total ESG Risk score, dtype: float64

In [15]:
# calculate the average Environment Risk Score by sector
esg.groupby('Sector')['Environment Risk Score'].agg('mean').sort_values(ascending=False)

Sector
Energy                    17.180000
Basic Materials           12.952632
Utilities                 12.660714
Consumer Defensive         8.385294
Industrials                6.929310
Consumer Cyclical          5.016667
Technology                 3.867742
Real Estate                3.765517
Healthcare                 1.803774
Communication Services     1.792857
Financial Services         1.311475
Name: Environment Risk Score, dtype: float64

In [16]:
# calculate the average Governance Risk Score by sector
esg.groupby('Sector')['Governance Risk Score'].agg('mean').sort_values(ascending=False)

Sector
Financial Services        9.852459
Healthcare                7.552830
Communication Services    7.464286
Basic Materials           6.931579
Energy                    6.565000
Industrials               6.082759
Utilities                 5.760714
Technology                5.751613
Consumer Defensive        5.697059
Real Estate               5.417241
Consumer Cyclical         5.388889
Name: Governance Risk Score, dtype: float64

In [17]:
# calculate the average Social Risk Score by sector
esg.groupby('Sector')['Social Risk Score'].agg('mean').sort_values(ascending=False)

Sector
Healthcare                11.562264
Industrials               10.696552
Communication Services    10.335714
Consumer Defensive        10.182353
Financial Services         9.485246
Energy                     9.310000
Utilities                  9.260714
Consumer Cyclical          8.344444
Basic Materials            7.542105
Technology                 7.191935
Real Estate                3.793103
Name: Social Risk Score, dtype: float64

In [18]:
# calculate the average Total ESG Risk Score by industry
# only select industries within the top 3 sector with highest average Total ESG Risk Score
esg[esg['Sector'].isin(['Real Estate', 'Technology', 'Consumer Defensive'])].groupby('Industry')['Total ESG Risk score'].mean().sort_values(ascending=False) 

Industry
Farm Products                          35.666667
Packaged Foods                         28.500000
Beverages—Wineries & Distilleries      26.000000
Confectioners                          25.500000
Tobacco                                24.000000
Beverages—Non-Alcoholic                22.333333
Semiconductors                         22.250000
Household & Personal Products          21.714286
Grocery Stores                         21.000000
Beverages—Brewers                      21.000000
Discount Stores                        19.200000
Scientific & Technical Instruments     19.000000
Electronic Components                  18.666667
Consumer Electronics                   17.000000
Real Estate Services                   17.000000
Software—Infrastructure                16.090909
Software—Application                   16.000000
Information Technology Services        14.700000
Semiconductor Equipment & Materials    14.500000
REIT—Hotel & Motel                     14.000000
Communicati

In [19]:
# calculate the average Environment Risk Score by industry
# only select industries within the top 3 sector with highest average Environment Risk Score
esg[esg['Sector'].isin(['Financial Services', 'Communication Services', 'Healthcare'])].groupby('Industry')['Environment Risk Score'].mean().sort_values(ascending=False) 

Industry
Telecom Services                          4.360000
Medical Instruments & Supplies            3.550000
Medical Devices                           3.125000
Medical Care Facilities                   2.500000
Pharmaceutical Retailers                  2.000000
Insurance—Specialty                       2.000000
Medical Distribution                      1.900000
Banks—Regional                            1.822222
Insurance—Diversified                     1.666667
Financial Data & Stock Exchanges          1.650000
Banks—Diversified                         1.600000
Asset Management                          1.533333
Insurance—Property & Casualty             1.400000
Drug Manufacturers—Specialty & Generic    1.400000
Internet Content & Information            1.350000
Capital Markets                           1.325000
Insurance—Life                            1.140000
Diagnostics & Research                    1.110000
Drug Manufacturers—General                1.088889
Biotechnology         

In [20]:
# calculate the average Governance Risk Score by industry
# only select industries within the top 3 sector with highest average Governance Risk Score
esg[esg['Sector'].isin(['Consumer Cyclical', 'Real Estate', 'Consumer Defensive'])].groupby('Industry')['Governance Risk Score'].mean().sort_values(ascending=False) 

Industry
Resorts & Casinos                     10.033333
Auto Manufacturers                     8.900000
Internet Retail                        8.450000
Tobacco                                7.900000
Personal Services                      7.000000
Footwear & Accessories                 6.900000
Farm Products                          6.766667
Household & Personal Products          6.214286
Travel Services                        5.880000
Real Estate Services                   5.700000
REIT—Residential                       5.666667
REIT—Office                            5.650000
Beverages—Brewers                      5.600000
REIT—Retail                            5.520000
Grocery Stores                         5.500000
Apparel Retail                         5.400000
REIT—Specialty                         5.400000
Residential Construction               5.350000
Discount Stores                        5.340000
Packaged Foods                         5.237500
Confectioners                  

In [21]:
# calculate the average Social Risk Score by industry
# only select industries within the top 3 sector with highest average Social Risk Score
esg[esg['Sector'].isin(['Real Estate', 'Technology', 'Basic Materials'])].groupby('Industry')['Social Risk Score'].mean().sort_values(ascending=False) 

Industry
Steel                                  13.700000
Copper                                 10.100000
Agricultural Inputs                     9.266667
Software—Infrastructure                 8.754545
Software—Application                    8.462500
Gold                                    8.200000
Information Technology Services         8.140000
Chemicals                               7.700000
Real Estate Services                    7.550000
Consumer Electronics                    6.900000
Scientific & Technical Instruments      6.900000
Semiconductors                          6.608333
Communication Equipment                 6.575000
REIT—Hotel & Motel                      6.000000
Specialty Chemicals                     5.833333
Computer Hardware                       5.720000
Electronic Components                   5.166667
Building Materials                      4.800000
REIT—Office                             4.350000
Semiconductor Equipment & Materials     4.075000
REIT—Retail

# 3. Company & ESG

In [23]:
# merge fin & esg datasets to investigate the relationship between company financial situation and ESG Risk Score
fin_esg = pd.merge(fin,esg,on='Symbol')
fin_esg.drop(columns=['Address','Sector','Industry','Full Time Employees','Description','Controversy Score','Controversy Level','ESG Risk Percentile','ESG Risk Level'], inplace=True)

In [24]:
fin_esg

Unnamed: 0,Symbol,market_value,latest_revenue,stock_volatility,Name,Total ESG Risk score,Environment Risk Score,Governance Risk Score,Social Risk Score,state
0,A,33349068800,6848000000,0.016883,Agilent Technologies Inc,15,0.3,6.3,8.6,CA
1,AAL,8103907840,48971000000,0.023060,American Airlines Group Inc,29,12.0,5.0,12.0,TX
2,AAP,3153115392,11154722000,0.031097,Advance Auto Parts Inc,12,0.0,3.0,8.0,NC
3,AAPL,2977583333376,383285000000,0.014296,Apple Inc,17,0.6,9.2,6.9,CA
4,ABBV,244191821824,58054000000,0.012325,Abbvie Inc,28,1.1,9.9,16.8,IL
...,...,...,...,...,...,...,...,...,...,...
427,XRAY,6343088640,3922000000,0.018855,Dentsply Sirona Inc,16,2.0,7.0,7.0,NC
428,XYL,24158427136,5522000000,0.015217,Xylem Inc,16,4.0,5.0,7.0,DC
429,YUM,35913060352,6842000000,0.010160,Yum Brands Inc,21,4.5,4.4,11.6,KY
430,ZBH,23510362112,6939900000,0.013304,Zimmer Biomet Holdings Inc,27,4.0,8.0,15.0,IN


In [25]:
# irrelevant
corr_market_value_esg = fin_esg[['market_value','Total ESG Risk score']].corr()
corr_market_value_esg

Unnamed: 0,market_value,Total ESG Risk score
market_value,1.0,-0.006791
Total ESG Risk score,-0.006791,1.0


In [26]:
# irrelevant
corr_market_value_gov = fin_esg[['market_value','Governance Risk Score']].corr()
corr_market_value_gov

Unnamed: 0,market_value,Governance Risk Score
market_value,1.0,0.137947
Governance Risk Score,0.137947,1.0


In [27]:
# irrelevant
corr_rev_esg = fin_esg[['latest_revenue','Total ESG Risk score']].corr()
corr_rev_esg

Unnamed: 0,latest_revenue,Total ESG Risk score
latest_revenue,1.0,0.088897
Total ESG Risk score,0.088897,1.0


In [28]:
# irrelevant
corr_rev_gov = fin_esg[['latest_revenue','Governance Risk Score']].corr()
corr_rev_gov

Unnamed: 0,latest_revenue,Governance Risk Score
latest_revenue,1.0,0.102637
Governance Risk Score,0.102637,1.0


In [29]:
# irrelevant
corr_stock_esg = fin_esg[['stock_volatility','Total ESG Risk score']].corr()
corr_stock_esg

Unnamed: 0,stock_volatility,Total ESG Risk score
stock_volatility,1.0,-0.012715
Total ESG Risk score,-0.012715,1.0


In [30]:
# irrelevant
corr_stock_gov = fin_esg[['stock_volatility','Governance Risk Score']].corr()
corr_stock_gov

Unnamed: 0,stock_volatility,Governance Risk Score
stock_volatility,1.0,0.079848
Governance Risk Score,0.079848,1.0
