## ESG Controversy - Descriptive Statisitcs

In [1]:
import pandas as pd
import numpy as np 
import os
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go 
from plotly.graph_objects import Layout
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
from tabulate import tabulate

In [2]:
# Import data
os.chdir(
    r"//Users/mlvos/Desktop/Moritz/Education/Erasmus University/Master/Master Thesis_code/"
)

df_merged = pd.read_csv("data/merged_data.csv", index_col=['id', 'year'])
df_merged2 = pd.read_csv("data/merged2_data.csv", index_col=['id', 'year'])

  df_merged2 = pd.read_csv("data/merged2_data.csv", index_col=['id', 'year'])


In [3]:
import plotly.io as pio

# Define Custom Theme
pio.templates['master_thesis'] = go.layout.Template(
    layout=go.Layout(
        font = dict(family= 'Times New Roman', size = 30),
        xaxis = dict(zeroline = True, 
                    linewidth = 1, 
                    linecolor = 'black', 
                    title_font=dict(size=35),
                    mirror=False,
                    showline=True,
                    gridcolor='white'),
        yaxis = dict(zeroline = True, 
                    linewidth = 1, 
                    linecolor = 'black',
                    rangemode = 'tozero', 
                    title_font=dict(size=35),
                    mirror=False,
                    showline=True,
                    gridcolor='white'),
        margin=dict(l=100, r=0, t=0, b=100),
        colorway=['#0055B3', '#FF2400'],
        legend=dict(yanchor="top",
            y=0.98,
            xanchor="left",
            x=0.03,
            title = None,
            font=dict(size= 20),
            bordercolor="Black",
            borderwidth=1),
        plot_bgcolor='rgb(242, 242, 242)',
        ),
)
pio.templates.default = 'master_thesis'

### Environmental

In [4]:
df_merged['ESG Score'] = df_merged['ESG Score'].round(decimals=0).astype(int)
df_merged2['ESG Score'] = df_merged2['ESG Score'].round(decimals=0).astype(int)

In [5]:
df_environmental_esg = pd.DataFrame(df_merged.groupby(by=['ESG Score', 'Environmental_controversy_binary'])['ESG Controversies Score'].count().reset_index())
df_environmental_esg['Environmental_controversy_binary'] = df_environmental_esg['Environmental_controversy_binary'].astype(int)
df_environmental_esg['Environmental_controversy_binary'] = df_environmental_esg['Environmental_controversy_binary'].astype('category')

In [6]:
# Plot Environmental Controversy by ESG Score
fig_environmental = px.bar(df_environmental_esg,y='ESG Controversies Score', x='ESG Score',
                           color='Environmental_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=0.8, 
                           width=800, 
                           height=600)
#title='Environmental Controversy Count by ESG Scores', 

fig_environmental.update_yaxes(title = 'Count', dtick = 50)
fig_environmental.update_xaxes(title = 'ESG Score', dtick = 10)

fig_environmental.update_layout(legend_title="Environmental Controversy")


name_list = ['No', 'Yes']
for i,trace in enumerate (fig_environmental.data):
  trace.update(name=name_list[i])

fig_environmental.show()

fig_environmental.write_image("images/descriptives/environmental_by_esg.png")

pio.write_image(fig_environmental, 'images/descriptives/environmental_by_esg.png', scale=5, width=800, height=600)


### Social

In [7]:
df_social_esg = pd.DataFrame(df_merged.groupby(by=['ESG Score', 'Social_controversy_binary'])['ESG Controversies Score'].count().reset_index())
df_social_esg['Social_controversy_binary'] = df_social_esg['Social_controversy_binary'].astype(int)
df_social_esg['Social_controversy_binary'] = df_social_esg['Social_controversy_binary'].astype('category')

In [8]:
# Plot Social Controversy by ESG Score
fig_social = px.bar(df_social_esg,y='ESG Controversies Score', x='ESG Score',
                           color='Social_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=0.8, 
                           width=800, 
                           height=600)
#title='Social Controversy Count by ESG Scores', 

fig_social.update_yaxes(title = 'Count', dtick = 50)
fig_social.update_xaxes(title = 'ESG Score', dtick = 10)

fig_social.update_layout(legend_title="Social Controversy")

name_list = ['No', 'Yes']
for i,trace in enumerate (fig_social.data):
  trace.update(name=name_list[i])

fig_social.show()
fig_social.write_image("images/descriptives/social_by_esg.png")

pio.write_image(fig_social, 'images/descriptives/social_by_esg.png', scale=5, width=800, height=600)


## Governance

In [9]:
df_governance_esg = pd.DataFrame(df_merged.groupby(by=['ESG Score', 'Governance_controversy_binary'])['ESG Controversies Score'].count().reset_index())
df_governance_esg['Governance_controversy_binary'] = df_governance_esg['Governance_controversy_binary'].astype(int)
df_governance_esg['Governance_controversy_binary'] = df_governance_esg['Governance_controversy_binary'].astype('category')

In [10]:
# Plot Social Controversy by ESG Score
fig_governance = px.bar(df_governance_esg,y='ESG Controversies Score', x='ESG Score',
                           color='Governance_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=0.8, 
                           width=800, 
                           height=600)
#title='Governance Controversy Count by ESG Scores', 

fig_governance.update_yaxes(title = 'Count', dtick = 50)
fig_governance.update_xaxes(title = 'ESG Score', dtick = 10)

fig_governance.update_layout(legend_title="Governance Controversy")

name_list = ['No', 'Yes']
for i,trace in enumerate (fig_governance.data):
  trace.update(name=name_list[i])

fig_governance.show()

fig_governance.write_image("images/descriptives/governance_by_esg.png")

pio.write_image(fig_governance, 'images/descriptives/governance_by_esg.png', scale=5, width=800, height=600)

## Repeat Figures but with unlagged data

### Environmental

In [11]:
df_environmental_esg = pd.DataFrame(df_merged2.groupby(by=['ESG Score', 'Environmental_controversy_binary'])['ESG Controversies Score'].count().reset_index())
df_environmental_esg['Environmental_controversy_binary'] = df_environmental_esg['Environmental_controversy_binary'].astype(int)
df_environmental_esg['Environmental_controversy_binary'] = df_environmental_esg['Environmental_controversy_binary'].astype('category')

In [12]:
# Plot Environmental Controversy by ESG Score
fig_environmental = px.bar(df_environmental_esg,y='ESG Controversies Score', x='ESG Score',
                           color='Environmental_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=0.8, 
                           width=800, 
                           height=600)
#title='Environmental Controversy Count by ESG Scores', 

fig_environmental.update_yaxes(title = 'Count', dtick = 50)
fig_environmental.update_xaxes(title = 'ESG Score', dtick = 10)

fig_environmental.update_layout(legend_title="Environmental Controversy")

name_list = ['No', 'Yes']
for i,trace in enumerate (fig_environmental.data):
  trace.update(name=name_list[i])

fig_environmental.show()

fig_environmental.write_image("images/descriptives/environmental_by_esg_unlagged.png")

pio.write_image(fig_environmental, 'images/descriptives/environmental_by_esg_unlagged.png', scale=5, width=800, height=600)

### Social

In [13]:
df_social_esg = pd.DataFrame(df_merged.groupby(by=['ESG Score', 'Social_controversy_binary'])['ESG Controversies Score'].count().reset_index())
df_social_esg['Social_controversy_binary'] = df_social_esg['Social_controversy_binary'].astype(int)
df_social_esg['Social_controversy_binary'] = df_social_esg['Social_controversy_binary'].astype('category')

In [14]:
# Plot Social Controversy by ESG Score
fig_social = px.bar(df_social_esg,y='ESG Controversies Score', x='ESG Score',
                           color='Social_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=0.8, 
                           width=800, 
                           height=600)
#title='Social Controversy Count by ESG Scores', 

fig_social.update_yaxes(title = 'Count', dtick = 50)
fig_social.update_xaxes(title = 'ESG Score', dtick = 10)

fig_social.update_layout(legend_title="Social Controversy")

name_list = ['No', 'Yes']
for i,trace in enumerate (fig_social.data):
  trace.update(name=name_list[i])

fig_social.show()
fig_social.write_image("images/descriptives/social_by_esg_unlagged.png")

pio.write_image(fig_social, 'images/descriptives/social_by_esg_unlagged.png', scale=5, width=800, height=600)

### Governance

In [15]:
df_governance_esg = pd.DataFrame(df_merged.groupby(by=['ESG Score', 'Governance_controversy_binary'])['ESG Controversies Score'].count().reset_index())
df_governance_esg['Governance_controversy_binary'] = df_governance_esg['Governance_controversy_binary'].astype(int)
df_governance_esg['Governance_controversy_binary'] = df_governance_esg['Governance_controversy_binary'].astype('category')

In [16]:
# Plot Social Controversy by ESG Score
fig_governance = px.bar(df_governance_esg,y='ESG Controversies Score', x='ESG Score',
                           color='Governance_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=0.8, 
                           width=800, 
                           height=600)
#title='Governance Controversy Count by ESG Scores', 

fig_governance.update_yaxes(title = 'Count', dtick = 50,)
fig_governance.update_xaxes(title = 'ESG Score', dtick = 10)

fig_governance.update_layout(legend_title="Governance Controversy")

name_list = ['No', 'Yes']
for i,trace in enumerate (fig_governance.data):
  trace.update(name=name_list[i])

fig_governance.show()

fig_governance.write_image("images/descriptives/governance_by_esg_unlagged.png")

pio.write_image(fig_governance, 'images/descriptives/governance_by_esg_unlagged.png', scale=5, width=800, height=600)

## Correlation Plot

Environmental

In [17]:
df_correlation = df_merged.drop(columns=['ESG Combined Score', 'ESG Controversies Score', 'Environmental Controversies Count', 'Social Controversies Count', 'Governance Controversies Count'])
# df_corr = df_correlation.corr(numeric_only=True)

# thresh = 0.0

# # get correlation matrix
# df_corr = df_corr.corr().abs().unstack()

# # filter
# df_corr_filt = df_corr[(df_corr>thresh) | (df_corr<-thresh)].reset_index()

# # deduplicate
# df_corr = df_corr_filt.iloc[df_corr_filt[['level_0','level_1']].apply(lambda r: ''.join(map(str, sorted(r))), axis = 1).drop_duplicates().index]

In [18]:
# df_corr.loc[df_corr['level_0'] == 'Environmental_controversy_binary', :]

In [19]:
df_corr = df_correlation.corr(numeric_only=True)
corr_matrix = df_corr['Environmental_controversy_binary'].abs().sort_values(ascending=False)
top_corr_features = corr_matrix[:10].index #TODO: Change to 20
df_corr = df_correlation[top_corr_features].corr(numeric_only=True)
df_corr.columns = ['Environmental Controversy', 'Net Assets', 'Social Controversy', 'CSR Strategy Score', 
                   'Emissions Score', 'ESG Score', 'Environmental Pillar Score', 'Stakeholder Engagement Score',
                   'Resource Use Score', 'Integrated Strategy in MD&A Score']

In [20]:
fig_heatmap = ff.create_annotated_heatmap(
    z=df_corr.to_numpy().round(2),
    x=list(df_corr.columns.values),
    y=list(df_corr.columns.values),       
    xgap=3, ygap=3,
    zmin=-1, zmax=1,
    colorscale  = ["red", "yellow", "green"],#'Cividis_r', #["green", "yellow", "orange", "red"],
    showscale=True,
    colorbar_thickness=30,
    colorbar_ticklen=3,
    colorbar = dict(tickfont=dict(size=40))
)

fig_heatmap.update_xaxes(tickangle=30)

fig_heatmap.update_layout(xaxis_showgrid=False,
                  xaxis={'side': 'bottom', 'tickfont':dict(size=45)},
                  yaxis={'tickfont':dict(size=45)},
                  yaxis_showgrid=False,
                  yaxis_autorange='reversed',                   
                  paper_bgcolor=None,
                  height = 1200,
                  width = 2200,
                  margin=dict(l=700, r=550, t=20, b=400),
)

# Make text size smaller
for i in range(len(fig_heatmap.layout.annotations)):
    fig_heatmap.layout.annotations[i].font.size = 40
    
    
fig_heatmap.show()
fig_heatmap.write_image("images/descriptives/environmental_correlation.png")

pio.write_image(fig_heatmap, 'images/descriptives/environmental_correlation.png', scale=5, width=2200, height=1200)

Social

In [21]:
df_correlation = df_merged.drop(columns=['ESG Combined Score', 'ESG Controversies Score', 'Environmental Controversies Count', 'Social Controversies Count', 'Governance Controversies Count'])

df_corr = df_correlation.corr(numeric_only=True)
corr_matrix = df_corr['Social_controversy_binary'].abs().sort_values(ascending=False)
top_corr_features = corr_matrix[:10].index 
df_corr = df_correlation[top_corr_features].corr(numeric_only=True)
df_corr.columns = ['Social Controversy',
 'Net Assets',
 'Social Pillar Score',
 'Resource Use Score',
 'ESG Score',
 'Environmental Pillar Score',
 'Emissions Score',
 'Human Rights Score',
 'CSR Strategy Score',
 'Workforce Score']

In [22]:
fig_heatmap = ff.create_annotated_heatmap(
    z=df_corr.to_numpy().round(2),
    x=list(df_corr.columns.values),
    y=list(df_corr.columns.values),       
    xgap=3, ygap=3,
    zmin=-1, zmax=1,
    colorscale  = ["red", "yellow", "green"],#'Cividis_r', #["green", "yellow", "orange", "red"],
    showscale=True,
    colorbar_thickness=30,
    colorbar_ticklen=3,
    colorbar = dict(tickfont=dict(size=40))
)

fig_heatmap.update_xaxes(tickangle=30)

fig_heatmap.update_layout(xaxis_showgrid=False,
                  xaxis={'side': 'bottom', 'tickfont':dict(size=45)},
                  yaxis={'tickfont':dict(size=45)},
                  yaxis_showgrid=False,
                  yaxis_autorange='reversed',                   
                  paper_bgcolor=None,
                  height = 1200,
                  width = 1600,
                  margin=dict(l=550, r=250, t=20, b=300),
)

# Make text size smaller
for i in range(len(fig_heatmap.layout.annotations)):
    fig_heatmap.layout.annotations[i].font.size = 40
   
    
fig_heatmap.show()
fig_heatmap.write_image("images/descriptives/social_correlation.png")

pio.write_image(fig_heatmap, 'images/descriptives/social_correlation.png', scale=5, width=1600, height=1200)

Governance

In [23]:
df_correlation = df_merged.drop(columns=['ESG Combined Score', 'ESG Controversies Score', 'Environmental Controversies Count', 'Social Controversies Count', 'Governance Controversies Count'])

df_corr = df_correlation.corr(numeric_only=True)
corr_matrix = df_corr['Governance_controversy_binary'].abs().sort_values(ascending=False)
top_corr_features = corr_matrix[:10].index #TODO: Change to 20
df_corr = df_correlation[top_corr_features].corr(numeric_only=True)
df_corr.columns = ['Governance Controversy',
 'Net Assets',
 'Social Controversy',
 'Total Assets',
 'Total Liabilities',
 'Recent Social Controversies',
 'Social Pillar Score',
 'Environmental Pillar Score',
 'Emissions Score',
 'ESG Score']

In [24]:
fig_heatmap = ff.create_annotated_heatmap(
    z=df_corr.to_numpy().round(2),
    x=list(df_corr.columns.values),
    y=list(df_corr.columns.values),       
    xgap=3, ygap=3,
    zmin=-1, zmax=1,
    colorscale  = ["red", "yellow", "green"],#'Cividis_r', #["green", "yellow", "orange", "red"],
    showscale=True,
    colorbar_thickness=30,
    colorbar_ticklen=3,
    colorbar = dict(tickfont=dict(size=40))
)

fig_heatmap.update_xaxes(tickangle=30)

fig_heatmap.update_layout(xaxis_showgrid=False,
                  xaxis={'side': 'bottom', 'tickfont':dict(size=45)},
                  yaxis={'tickfont':dict(size=45)},
                  yaxis_showgrid=False,
                  yaxis_autorange='reversed',                   
                  paper_bgcolor=None,
                  height = 1200,
                  width = 1600,
                  margin=dict(l=550, r=220, t=20, b=320),
)

# Make text size smaller
for i in range(len(fig_heatmap.layout.annotations)):
    fig_heatmap.layout.annotations[i].font.size = 40
   
    
fig_heatmap.show()
fig_heatmap.write_image("images/descriptives/governance_correlation.png")

pio.write_image(fig_heatmap, 'images/descriptives/governance_correlation.png', scale=5, width=1600, height=1200)

In [25]:
df_correlation = df_merged.drop(columns=['ESG Combined Score', 'Environmental_controversy_binary', 'Social_controversy_binary', 'Governance_controversy_binary', 'Environmental Controversies Count', 'Social Controversies Count', 'Governance Controversies Count'])

df_corr = df_correlation.corr(numeric_only=True)
corr_matrix = df_corr['ESG Controversies Score'].abs().sort_values(ascending=False)
top_corr_features = corr_matrix[:10].index #TODO: Change to 20
df_corr = df_correlation[top_corr_features].corr(numeric_only=True)
df_corr.columns
df_corr.columns = ['ESG Controversies Score',
 'Net Assets',
 'Total Assets',
 'ESG Score',
 'Social Pillar Score',
 'Recent Social Controversies',
 'Total Liabilities',
 'Resource Use Score',
 'Human Rights Score',
 'Environmental Pillar Score']

In [26]:
fig_heatmap = ff.create_annotated_heatmap(
    z=df_corr.to_numpy().round(2),
    x=list(df_corr.columns.values),
    y=list(df_corr.columns.values),       
    xgap=3, ygap=3,
    zmin=-1, zmax=1,
    colorscale  = ["red", "yellow", "green"],#'Cividis_r', #["green", "yellow", "orange", "red"],
    showscale=True,
    colorbar_thickness=30,
    colorbar_ticklen=3,
)
fig_heatmap.update_layout(xaxis_showgrid=False,
                  xaxis={'side': 'bottom'},
                  yaxis_showgrid=False,
                  yaxis_autorange='reversed',                   
                  paper_bgcolor=None,
                  height = 1200,
                  width = 1600,
                  margin=dict(l=350, r=250, t=20, b=250),
)
fig_heatmap.show()
fig_heatmap.write_image("images/descriptives/controversy_correlation.png")

### Descriptive Statistics

In [27]:

# Create subset of most relevant columns
df_subset = df_merged.loc[:, ['ESG Score',
                              'ESG Controversies Score',
                      'Social Pillar Score',
                      'Governance Pillar Score',
                      'Environmental Pillar Score',
                      'Environmental Controversies Count',
                      'Social Controversies Count',
                      'Governance Controversies Count',
                      'Environmental_controversy_binary',
                      'Social_controversy_binary',
                      'Governance_controversy_binary']]

In [28]:
# Describe 
df_des = df_subset.describe(percentiles=[])

df_des.to_excel("/Users/mlvos/Desktop/Moritz/Education/Erasmus University/Master/Master Thesis_code/data/des_stats.xlsx")

df_des

Unnamed: 0,ESG Score,ESG Controversies Score,Social Pillar Score,Governance Pillar Score,Environmental Pillar Score,Environmental Controversies Count,Social Controversies Count,Governance Controversies Count,Environmental_controversy_binary,Social_controversy_binary,Governance_controversy_binary
count,29979.0,29979.0,29979.0,29979.0,29979.0,29979.0,29979.0,29979.0,29979.0,29979.0,29979.0
mean,49.843691,89.835558,50.178581,53.3862,45.73467,0.020214,0.570833,0.041429,0.012776,0.171186,0.026885
std,21.005252,23.782422,24.507268,22.385753,28.229253,0.234447,2.660802,0.32243,0.112307,0.376678,0.161751
min,1.0,0.490196,0.052692,0.100775,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,51.0,100.0,50.90505,55.054533,47.806793,0.0,0.0,0.0,0.0,0.0,0.0
max,95.0,100.0,99.564164,99.462322,99.164037,8.0,106.0,15.0,1.0,1.0,1.0


In [29]:
# Convert to latex table

# 'ESG Score',
#                               'ESG Controversies Score',
#                       'Social Pillar Score',
#                       'Governance Pillar Score',
#                       'Environmental Pillar Score',
#                       'Environmental Controversies Count',
#                       'Social Controversies Count',
#                       'Governance Controversies Count',
#                       'Environmental Controversy',
#                       'Social Controversy',
#                       'Governance Controversy'
print(tabulate(df_des.T, headers=['Count', 'Mean', 'Std Deaviation', 'Min', '50%', 'Max'], tablefmt='latex'))

\begin{tabular}{lrrrrrr}
\hline
                                   &   Count &       Mean &   Std Deaviation &       Min &      50\% &      Max \\
\hline
 ESG Score                         &   29979 & 49.8437    &        21.0053   & 1         &  51      &  95      \\
 ESG Controversies Score           &   29979 & 89.8356    &        23.7824   & 0.490196  & 100      & 100      \\
 Social Pillar Score               &   29979 & 50.1786    &        24.5073   & 0.0526917 &  50.9051 &  99.5642 \\
 Governance Pillar Score           &   29979 & 53.3862    &        22.3858   & 0.100775  &  55.0545 &  99.4623 \\
 Environmental Pillar Score        &   29979 & 45.7347    &        28.2293   & 0         &  47.8068 &  99.164  \\
 Environmental Controversies Count &   29979 &  0.0202141 &         0.234447 & 0         &   0      &   8      \\
 Social Controversies Count        &   29979 &  0.570833  &         2.6608   & 0         &   0      & 106      \\
 Governance Controversies Count    &   29979 &  

In [30]:
# Create table of variable for appendix
appendix_list_1 = list(df_merged.columns)[:55]
appendix_list_2 = list(df_merged.columns)[55:]

appendix_df_1 = pd.DataFrame(appendix_list_1)
appendix_df_2 = pd.DataFrame(appendix_list_2)

In [31]:
print(tabulate(appendix_df_1, headers=['Variables'], tablefmt='latex'))

\begin{tabular}{rl}
\hline
    & Variables                                           \\
\hline
  0 & ESG Score                                           \\
  1 & ESG Combined Score                                  \\
  2 & ESG Controversies Score                             \\
  3 & Social Pillar Score                                 \\
  4 & Governance Pillar Score                             \\
  5 & Environmental Pillar Score                          \\
  6 & Resource Use Score                                  \\
  7 & Emissions Score                                     \\
  8 & Environmental Innovation Score                      \\
  9 & Workforce Score                                     \\
 10 & Human Rights Score                                  \\
 11 & Community Score                                     \\
 12 & Product Responsibility Score                        \\
 13 & Management Score                                    \\
 14 & Shareholders Score                           

In [32]:
print(tabulate(appendix_df_2, headers=['Variables'], tablefmt='latex'))

\begin{tabular}{rl}
\hline
    & Variables                                           \\
\hline
  0 & External\_Consultants\_Score                          \\
  1 & CEO\_Compensation\_Link\_to\_TSR\_Score                  \\
  2 & Shareholders\_Approval\_Stock\_Compensation\_Plan\_Score \\
  3 & Board\_Functions\_Policy\_Score                        \\
  4 & Board\_Structure\_Policy\_Score                        \\
  5 & Audit\_Committee\_Mgt\_Independence\_Score              \\
  6 & Audit\_Committee\_Expertise\_Score                     \\
  7 & Compensation\_Committee\_Mgt\_Independence\_Score       \\
  8 & Nomination\_Committee\_Mgt\_Independence\_Score         \\
  9 & Board\_Attendance\_Score                              \\
 10 & Board\_Size\_More\_Ten\_Less\_Eight\_Score                \\
 11 & Board\_Background\_and\_Skills\_Score                   \\
 12 & Board\_Gender\_Diversity.\_Percent\_Score               \\
 13 & Board\_Specific\_Skills.\_Percent\_Score                \

In [33]:
df_merged[df_merged['Social_controversy_binary'] == 1]

Unnamed: 0_level_0,Unnamed: 1_level_0,ESG Score,ESG Combined Score,ESG Controversies Score,Social Pillar Score,Governance Pillar Score,Environmental Pillar Score,Resource Use Score,Emissions Score,Environmental Innovation Score,Workforce Score,...,Recent Governance Controversies,Environmental_controversy_binary,Social_controversy_binary,Governance_controversy_binary,ISIN Code,GICS Industry Group Name,total assets,total liabilities,country,net assets
id,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
000063.SZ,2014,46,45.540825,100.000000,41.571757,29.733045,80.363322,73.529412,55.882353,96.875000,81.250000,...,0.0,0.0,1.0,0.0,CNE000000TK5,Technology Hardware & Equipment,,,,
000063.SZ,2015,52,45.884017,40.000000,50.905390,38.949204,75.894899,60.526316,52.777778,97.058824,58.000000,...,0.0,0.0,1.0,0.0,CNE000000TK5,Technology Hardware & Equipment,,,,
000063.SZ,2016,55,54.815984,66.666667,56.524916,44.148749,70.658263,70.000000,60.416667,76.190476,81.944444,...,0.0,0.0,1.0,0.0,CNE000000TK5,Technology Hardware & Equipment,,,,
000063.SZ,2017,61,41.212636,21.428571,57.619052,50.836439,84.820903,76.562500,76.785714,94.000000,87.500000,...,0.0,0.0,1.0,0.0,CNE000000TK5,Technology Hardware & Equipment,,,,
000063.SZ,2018,74,70.246061,66.666667,80.575417,61.699876,83.444192,78.750000,79.729730,88.235294,88.095238,...,0.0,0.0,1.0,0.0,CNE000000TK5,Technology Hardware & Equipment,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZBH.N,2017,46,36.872215,27.777778,60.978160,43.280717,9.368337,17.164179,12.500000,0.000000,66.071429,...,0.0,0.0,1.0,0.0,US98956P1021,Health Care Equipment & Services,25964.5,14229.0,USA,11735.5
ZURN.S,2013,59,59.241771,60.714286,45.854084,72.011494,81.106172,58.988764,81.958763,66.000000,50.347222,...,0.0,0.0,1.0,0.0,CH0011075394,Insurance,,,,
ZURN.S,2014,63,62.775613,90.909091,49.090751,74.487179,83.357360,56.593407,83.823529,79.090909,62.671233,...,0.0,0.0,1.0,0.0,CH0011075394,Insurance,,,,
ZURN.S,2020,88,87.730177,100.000000,85.499842,88.080071,94.486669,98.188406,94.025157,93.406593,92.954545,...,0.0,0.0,1.0,0.0,CH0011075394,Insurance,,,,


In [34]:
df_merged2.loc[df_merged2['Social Controversies Count'] > 80, ['ESG Score', 'Social Controversies Count']].sort_values('Social Controversies Count', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,ESG Score,Social Controversies Count
id,year,Unnamed: 2_level_1,Unnamed: 3_level_1
META.OQ,2019,43,106.0
GOOGL.OQ,2021,79,98.0
META.OQ,2020,49,93.0
VOWG_p.DE,2017,80,90.0
GOOGL.OQ,2020,70,87.0
BA.N,2020,80,83.0


In [35]:
df_merged2.loc[df_merged2['Environmental Controversies Count'] > 5, ['ESG Score', 'Environmental Controversies Count']].sort_values('Environmental Controversies Count', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,ESG Score,Environmental Controversies Count
id,year,Unnamed: 2_level_1,Unnamed: 3_level_1
ENB.TO,2017,83,8.0
XOM.N,2020,72,8.0
MBGn.DE,2020,93,8.0
SHEL.L,2017,90,8.0
VALE3.SA,2016,85,7.0
BHP.AX,2017,90,7.0
MBGn.DE,2019,94,7.0
VALE3.SA,2017,78,6.0
ENB.TO,2020,80,6.0
XOM.N,2017,75,6.0


In [36]:
df_merged2.loc[df_merged2['Governance Controversies Count'] > 10, ['ESG Score', 'Governance Controversies Count']].sort_values('Governance Controversies Count', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,ESG Score,Governance Controversies Count
id,year,Unnamed: 2_level_1,Unnamed: 3_level_1
GS.N,2022,85,19.0
SPWR.OQ,2022,54,17.0
CSGN.S,2022,85,17.0
SNHJ.J,2019,43,15.0
DVA.N,2019,60,13.0


In [37]:
df_merged[df_merged.index.get_level_values(0) == 'META.OQ']['Governance_controversy_binary'].sort_index()

id       year
META.OQ  2013    1.0
         2014    0.0
         2015    1.0
         2016    0.0
         2017    0.0
         2018    1.0
         2019    1.0
         2020    1.0
         2021    1.0
Name: Governance_controversy_binary, dtype: float64

In [38]:
df_merged2[df_merged2.index.get_level_values(0) == 'META.OQ']['Governance_controversy_binary'].sort_index()

id       year
META.OQ  2013    1
         2014    1
         2015    0
         2016    1
         2017    0
         2018    0
         2019    1
         2020    1
         2021    1
         2022    1
Name: Governance_controversy_binary, dtype: int64

## Additional Figures

### ESG Score by Year

In [39]:
df_merged = df_merged.reset_index()
df_merged2 = df_merged2.reset_index()

In [40]:
df_environmental_esg_year = pd.DataFrame(df_merged.groupby(by=['Environmental_controversy_binary', 'year'])['ESG Score'].count())
df_environmental_esg_year.columns = ['Count']
df_environmental_esg_year = df_environmental_esg_year.reset_index()
df_environmental_esg_year2 = pd.DataFrame(df_merged2.groupby(by=['Environmental_controversy_binary', 'year'])['ESG Score'].count())
df_environmental_esg_year2.columns = ['Count']
df_environmental_esg_year2 = df_environmental_esg_year2.reset_index()

In [41]:
df_environmental_esg_year['Environmental_controversy_binary'] = df_environmental_esg_year['Environmental_controversy_binary'].astype(int)
df_environmental_esg_year['Environmental_controversy_binary'] = df_environmental_esg_year['Environmental_controversy_binary'].astype('category')

df_environmental_esg_year2['Environmental_controversy_binary'] = df_environmental_esg_year2['Environmental_controversy_binary'].astype(int)
df_environmental_esg_year2['Environmental_controversy_binary'] = df_environmental_esg_year2['Environmental_controversy_binary'].astype('category')

In [42]:
df_environmental_esg_year.columns

Index(['Environmental_controversy_binary', 'year', 'Count'], dtype='object')

In [43]:
# Plot Environmental Controversy by ESG Score (Lagged)
fig_environmental = px.bar(df_environmental_esg_year, y='Count', x='year',
                           color='Environmental_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=0.8, 
                           width=800, 
                           height=600)
#title='Environmental Controversy Count by ESG Scores', 

fig_environmental.update_yaxes(title = 'Count', dtick = 1000, title_font=dict(size=18))
fig_environmental.update_xaxes(title = 'Year', dtick = 2, title_font=dict(size=18))

fig_environmental.update_layout(legend=dict(
    title = None,
    font=dict(size= 15)), legend_title="Environmental Controversy", yaxis = dict(
tickfont = dict(size=20)), xaxis = dict(
tickfont = dict(size=20)))

name_list = ['No', 'Yes']
for i,trace in enumerate (fig_environmental.data):
  trace.update(name=name_list[i])

fig_environmental.show()

fig_environmental.write_image("images/descriptives/environmental_by_esg_year.png")

In [44]:
# Plot Environmental Controversy by ESG Score
fig_environmental = px.bar(df_environmental_esg_year2, y='Count', x='year',
                           color='Environmental_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=0.8, 
                           width=800, 
                           height=600)
#title='Environmental Controversy Count by ESG Scores', 

fig_environmental.update_yaxes(title = 'Count', dtick = 1000, title_font=dict(size=18))
fig_environmental.update_xaxes(title = 'Year', dtick = 2, title_font=dict(size=18))

fig_environmental.update_layout(legend=dict(
    title = None,
    font=dict(size= 15)), legend_title="Environmental Controversy", yaxis = dict(
tickfont = dict(size=20)), xaxis = dict(
tickfont = dict(size=20)))

name_list = ['No', 'Yes']
for i,trace in enumerate (fig_environmental.data):
  trace.update(name=name_list[i])

fig_environmental.show()

fig_environmental.write_image("images/descriptives/environmental_by_esg_year2.png")

### Social Controversy - show outlier

In [45]:
df_soical_outlier_trend  = pd.DataFrame(df_merged.groupby(by=['year'])['Social Controversies Count'].sum())
df_soical_outlier_trend  = df_soical_outlier_trend.reset_index()

In [46]:
# Lagged
fig = px.line(df_soical_outlier_trend, x="year", y="Social Controversies Count")
fig.show()

In [47]:
df_tmp = df_merged.reset_index()
df_soical_outlier  = df_tmp[['year', 'Social Controversies Count']]
df_soical_outlier  = df_soical_outlier.reset_index()

In [48]:
# Lagged Social
fig = px.scatter(df_soical_outlier, x="year", 
                 y="Social Controversies Count", 
                 width=800, 
                 height=600)

fig.update_yaxes(title = 'Social Controversies Count')
fig.update_xaxes(title = 'Year', dtick = 2)


fig.show()

fig.write_image("images/descriptives/scatter_social.png", width=1080, height=1080)

pio.write_image(fig, 'images/descriptives/scatter_social.png', scale=5, width=800, height=600)

In [49]:
# Lagged Environmental
df_tmp = df_merged.reset_index()
df_environmental_outlier  = df_tmp[['year', 'Environmental Controversies Count']]
df_environmental_outlier  = df_environmental_outlier.reset_index()

In [50]:
fig = px.scatter(df_environmental_outlier, x="year", 
                 y="Environmental Controversies Count", 
                 width=800, 
                 height=600)

fig.update_layout(yaxis = dict(
tickfont = dict(size=20)), xaxis = dict(
tickfont = dict(size=20)))

fig.update_yaxes(title = 'Environmental Controversies Count', title_font=dict(size=18))
fig.update_xaxes(title = 'Year', title_font=dict(size=18))


fig.show()

fig.write_image("images/descriptives/scatter_environmental.png")

In [51]:
# Lagged Governance
df_tmp = df_merged.reset_index()
df_governance_outlier  = df_tmp[['year', 'Governance Controversies Count']]
df_governance_outlier  = df_governance_outlier.reset_index()

In [52]:
fig = px.scatter(df_governance_outlier, x="year", 
                 y="Governance Controversies Count", 
                 width=800, 
                 height=600)

fig.update_layout(yaxis = dict(
tickfont = dict(size=20)), xaxis = dict(
tickfont = dict(size=20)))

fig.update_yaxes(title = 'Governance Controversies Count', title_font=dict(size=18))
fig.update_xaxes(title = 'Year', title_font=dict(size=18))


fig.show()

fig.write_image("images/descriptives/scatter_governance.png")

### Differences between lagged and not lagged

In [53]:
df_lagged = pd.DataFrame(df_merged.groupby(by=['Social_controversy_binary', 'year'])['ESG Score'].count())
df_lagged.columns = ['Count']
df_lagged = df_lagged.reset_index()

df_lagged['Social_controversy_binary'] = df_lagged['Social_controversy_binary'].astype(int)
df_lagged['Social_controversy_binary'] = df_lagged['Social_controversy_binary'].astype('category')

In [54]:
# Plot Social Controversy by Year(lagged)
fig_social = px.bar(df_lagged, y='Count', x='year',
                           color='Social_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=1.0, 
                           width=800, 
                           height=600)
 

fig_social.update_yaxes(title = 'Count', dtick = 1000, title_font=dict(size=18))
fig_social.update_xaxes(title = 'Year', dtick = 2, title_font=dict(size=18))

fig_social.update_layout(legend=dict(
    title = None,
    font=dict(size= 15)), legend_title="Social Controversy", yaxis = dict(
tickfont = dict(size=20)), xaxis = dict(
tickfont = dict(size=20)))

name_list = ['No', 'Yes']
for i,trace in enumerate (fig_social.data):
  trace.update(name=name_list[i])

fig_social.show()

#fig_social.write_image("images/descriptives/environmental_by_esg_year2.png")

In [55]:
df_no_lagged = pd.DataFrame(df_merged2.groupby(by=['Social_controversy_binary', 'year'])['ESG Score'].count())
df_no_lagged.columns = ['Count']
df_no_lagged = df_no_lagged.reset_index()

df_no_lagged['Social_controversy_binary'] = df_no_lagged['Social_controversy_binary'].astype(int)
df_no_lagged['Social_controversy_binary'] = df_no_lagged['Social_controversy_binary'].astype('category')

In [56]:
# Plot Social Controversy by ESG Score (not lagged)
fig_social = px.bar(df_no_lagged, y='Count', x='year',
                           color='Social_controversy_binary', 
                           barmode = 'overlay', 
                           opacity=1.0, 
                           width=800, 
                           height=600)
 

fig_social.update_yaxes(title = 'Count', dtick = 1000, title_font=dict(size=18))
fig_social.update_xaxes(title = 'Year', dtick = 2, title_font=dict(size=18))

fig_social.update_layout(legend=dict(
    title = None,
    font=dict(size= 15)), legend_title="Social Controversy", yaxis = dict(
tickfont = dict(size=20)), xaxis = dict(
tickfont = dict(size=20)))

name_list = ['No', 'Yes']
for i,trace in enumerate (fig_social.data):
  trace.update(name=name_list[i])

fig_social.show()

#fig_social.write_image("images/descriptives/environmental_by_esg_year2.png")

#### Social

In [57]:
df_lagged = pd.DataFrame(df_merged.groupby(by=['Social_controversy_binary'])['ESG Score'].count())
df_lagged.columns = ['Count']
df_lagged = df_lagged.reset_index()

df_lagged['Social_controversy_binary'] = df_lagged['Social_controversy_binary'].astype(int)
df_lagged['Social_controversy_binary'] = df_lagged['Social_controversy_binary'].astype('category')

df_no_lagged = pd.DataFrame(df_merged2.groupby(by=['Social_controversy_binary'])['ESG Score'].count())
df_no_lagged.columns = ['Count']
df_no_lagged = df_no_lagged.reset_index()

df_no_lagged['Social_controversy_binary'] = df_no_lagged['Social_controversy_binary'].astype(int)
df_no_lagged['Social_controversy_binary'] = df_no_lagged['Social_controversy_binary'].astype('category')

In [58]:
# Create data frame with proportions as labels
df_lagged['percent'] = df_lagged['Count'] / df_lagged['Count'].sum()

df_no_lagged['percent'] = df_no_lagged['Count'] / df_no_lagged['Count'].sum()

In [59]:
# Combine the two figures
fig = go.Figure(
    layout=dict(
        yaxis = dict(title = 'Count', dtick = 5000, tickfont = dict(size=20), titlefont = dict(size=20)), 
        xaxis = dict(title = 'Social Controversy Binary', dtick = 2, tickfont = dict(size=20), titlefont = dict(size=20), tickvals = [0, 1], ticktext = ['No', 'Yes']),
        legend=dict(title="Type of Distribution")
    )
)

fig.add_trace(
    go.Bar(
        x=df_lagged.Social_controversy_binary,
        y=df_lagged.Count,
        text = df_lagged['percent'].apply(lambda x: '{0:2.2f}%'.format(x * 100)), 
        name="Lagged"
    )
)

fig.add_trace(
    go.Bar(
        x=df_no_lagged.Social_controversy_binary,
        y=df_no_lagged.Count,
        text = df_no_lagged['percent'].apply(lambda x: '{0:2.2f}%'.format(x * 100)), 
        name="Not Lagged"
    )
)

fig.update_layout(
    autosize=False,
    width=800,
    height=600)

fig.show()

fig.write_image("images/descriptives/social_lagged_no_lagged.png")

#### Environmental

In [60]:
# Create dataframes for plots
df_lagged = pd.DataFrame(df_merged.groupby(by=['Environmental_controversy_binary'])['ESG Score'].count())
df_lagged.columns = ['Count']
df_lagged = df_lagged.reset_index()

df_lagged['Environmental_controversy_binary'] = df_lagged['Environmental_controversy_binary'].astype(int)
df_lagged['Environmental_controversy_binary'] = df_lagged['Environmental_controversy_binary'].astype('category')

df_no_lagged = pd.DataFrame(df_merged2.groupby(by=['Environmental_controversy_binary'])['ESG Score'].count())
df_no_lagged.columns = ['Count']
df_no_lagged = df_no_lagged.reset_index()

df_no_lagged['Environmental_controversy_binary'] = df_no_lagged['Environmental_controversy_binary'].astype(int)
df_no_lagged['Environmental_controversy_binary'] = df_no_lagged['Environmental_controversy_binary'].astype('category')

# Create data frame with proportions as labels
df_lagged['percent'] = df_lagged['Count'] / df_lagged['Count'].sum()

df_no_lagged['percent'] = df_no_lagged['Count'] / df_no_lagged['Count'].sum()

In [61]:
# Combine the two figures
fig = go.Figure(
    layout=dict(
        yaxis = dict(title = 'Count', dtick = 5000, tickfont = dict(size=20), titlefont = dict(size=20)), 
        xaxis = dict(title = 'Environmental Controversy Binary', dtick = 2, tickfont = dict(size=20), titlefont = dict(size=20), tickvals = [0, 1], ticktext = ['No', 'Yes']),
        legend=dict(title="Type of Distribution")
    )
)

fig.add_trace(
    go.Bar(
        x=df_lagged.Environmental_controversy_binary,
        y=df_lagged.Count,
        text = df_lagged['percent'].apply(lambda x: '{0:2.2f}%'.format(x * 100)), 
        name="Lagged"
    )
)

fig.add_trace(
    go.Bar(
        x=df_no_lagged.Environmental_controversy_binary,
        y=df_no_lagged.Count,
        text = df_no_lagged['percent'].apply(lambda x: '{0:2.2f}%'.format(x * 100)), 
        name="Not Lagged"
    )
)

fig.update_layout(
    autosize=False,
    width=800,
    height=600)

fig.show()

fig.write_image("images/descriptives/environmental_lagged_no_lagged.png")

#### Governance

In [62]:
# Create dataframes for plots
df_lagged = pd.DataFrame(df_merged.groupby(by=['Governance_controversy_binary'])['ESG Score'].count())
df_lagged.columns = ['Count']
df_lagged = df_lagged.reset_index()

df_lagged['Governance_controversy_binary'] = df_lagged['Governance_controversy_binary'].astype(int)
df_lagged['Governance_controversy_binary'] = df_lagged['Governance_controversy_binary'].astype('category')

df_no_lagged = pd.DataFrame(df_merged2.groupby(by=['Governance_controversy_binary'])['ESG Score'].count())
df_no_lagged.columns = ['Count']
df_no_lagged = df_no_lagged.reset_index()

df_no_lagged['Governance_controversy_binary'] = df_no_lagged['Governance_controversy_binary'].astype(int)
df_no_lagged['Governance_controversy_binary'] = df_no_lagged['Governance_controversy_binary'].astype('category')

# Create data frame with proportions as labels
df_lagged['percent'] = df_lagged['Count'] / df_lagged['Count'].sum()

df_no_lagged['percent'] = df_no_lagged['Count'] / df_no_lagged['Count'].sum()

In [63]:
# Combine the two figures
fig = go.Figure(
    layout=dict(
        yaxis = dict(title = 'Count', dtick = 5000, tickfont = dict(size=20), titlefont = dict(size=20)), 
        xaxis = dict(title = 'Governance Controversy Binary', dtick = 2, tickfont = dict(size=20), titlefont = dict(size=20), tickvals = [0, 1], ticktext = ['No', 'Yes']),
        legend=dict(title="Type of Distribution")
    )
)

fig.add_trace(
    go.Bar(
        x=df_lagged.Governance_controversy_binary,
        y=df_lagged.Count,
        text = df_lagged['percent'].apply(lambda x: '{0:2.2f}%'.format(x * 100)), 
        name="Lagged"
    )
)

fig.add_trace(
    go.Bar(
        x=df_no_lagged.Governance_controversy_binary,
        y=df_no_lagged.Count,
        text = df_no_lagged['percent'].apply(lambda x: '{0:2.2f}%'.format(x * 100)), 
        name="Not Lagged"
    )
)

fig.update_layout(
    autosize=False,
    width=800,
    height=600)

fig.show()

fig.write_image("images/descriptives/governance_lagged_no_lagged.png")